Example #1
0
    def get_jobs():
        """
        Gets all the job page's content and their urls.

        :return: (job page html, job url)
        """
        eu = EuDiakokScraper
        for url in eu.get_job_links():
            if not is_job_already_scraped(url):
                yield requests.get(url).text, url
Example #2
0
 def get_jobs(self):
     """
     All job pagen vegigiteralunk az osszes munkan, amiket tovabb adunk a
     gather_specific_job_info() fgv-nek
     :return: (html content, job url)
     """
     root_html = requests.get(urljoin(self.base_url, self.all_job_url)).text
     soup = BeautifulSoup(root_html, 'html.parser')
     for job in soup.find_all("a", class_=self.single_job_href_tag):
         if is_job_already_scraped(job['href']):
             continue
         yield requests.get(job['href']).text, job['href']