def get_jobs(): """ Gets all the job page's content and their urls. :return: (job page html, job url) """ eu = EuDiakokScraper for url in eu.get_job_links(): if not is_job_already_scraped(url): yield requests.get(url).text, url
def get_jobs(self): """ All job pagen vegigiteralunk az osszes munkan, amiket tovabb adunk a gather_specific_job_info() fgv-nek :return: (html content, job url) """ root_html = requests.get(urljoin(self.base_url, self.all_job_url)).text soup = BeautifulSoup(root_html, 'html.parser') for job in soup.find_all("a", class_=self.single_job_href_tag): if is_job_already_scraped(job['href']): continue yield requests.get(job['href']).text, job['href']