コード例 #1
0
 def get_current_openings_page(self, job_page):
     fetcher = PageFetcher()
     page_source = fetcher.fetch_page(job_page)
     soup = BeautifulSoup(page_source, 'html.parser')
     current_openings_link = soup.find("a", string=re.compile("openings", re.I), href=True)
     if current_openings_link:
         return resolve_one_relative_page(self.url, current_openings_link['href'])
     else:
         link = soup.find("a", string=re.compile("open positions", re.I), href=True)
         if link:
             return resolve_one_relative_page(self.url, link['href'])
         else:
             return job_page
コード例 #2
0
 def find_posting_links(self, soup):
     fetcher = PageFetcher()
     offset = 0
     total = 1
     links = []
     while offset < total:
         content = fetcher.fetch_page("{url}?offset={offset}".format(
             url=self.url, offset=offset))
         content_json = simplejson.loads(content)
         for posting in content_json['content']:
             links.append(posting['ref'])
         total = content_json['totalFound']
         offset += 100
     return links
コード例 #3
0
 def get_current_openings_page(self, job_page):
     fetcher = PageFetcher()
     page_source = fetcher.fetch_page(job_page)
     soup = BeautifulSoup(page_source, 'html.parser')
     current_openings_link = soup.find("a",
                                       string=re.compile("openings", re.I),
                                       href=True)
     if current_openings_link:
         return resolve_one_relative_page(self.url,
                                          current_openings_link['href'])
     else:
         link = soup.find("a",
                          string=re.compile("open positions", re.I),
                          href=True)
         if link:
             return resolve_one_relative_page(self.url, link['href'])
         else:
             return job_page
コード例 #4
0
 def start(self):
     self.logger.info("Starting job page finder on {orig}".format(orig=self.url))
     fetcher = PageFetcher()
     page_source = fetcher.fetch_page(self.url)
     soup = BeautifulSoup(page_source, 'html.parser')
     try:
         return self.find_job_pages(soup)
     except Exception as e:
         success = False
         for link in self.find_alternative_pages(soup):
             self.logger.info("Checking alternative page: {u}".format(u=link))
             try:
                 page_source = fetcher.fetch_page(link)
                 alternative_soup = BeautifulSoup(page_source, 'html.parser')
                 pages = self.find_job_pages(alternative_soup)
                 success = True
                 return pages
             except:
                 continue
         if not success:
             raise e
コード例 #5
0
 def start(self):
     self.logger.info(
         "Starting job page finder on {orig}".format(orig=self.url))
     fetcher = PageFetcher()
     page_source = fetcher.fetch_page(self.url)
     soup = BeautifulSoup(page_source, 'html.parser')
     try:
         return self.find_job_pages(soup)
     except Exception as e:
         success = False
         for link in self.find_alternative_pages(soup):
             self.logger.info(
                 "Checking alternative page: {u}".format(u=link))
             try:
                 page_source = fetcher.fetch_page(link)
                 alternative_soup = BeautifulSoup(page_source,
                                                  'html.parser')
                 pages = self.find_job_pages(alternative_soup)
                 success = True
                 return pages
             except:
                 continue
         if not success:
             raise e
コード例 #6
0
ファイル: ats_analyzers.py プロジェクト: ghoranyi/ats_crawler
 def fetch_page(self):
     fetcher = PageFetcher()
     job_page_content = fetcher.fetch_page(self.url)
     soup = BeautifulSoup(job_page_content, 'html.parser')
     return soup