def download_datasets(link, folder): page = con.session.get(helper.fix_link(link)) soup = BeautifulSoup(page.text, 'html.parser') dataset = soup.findAll('a', { 'href': re.compile('^https'), 'class': re.compile('^link-borderless') }) if len(dataset) == 0: sys.stdout.write( f'{bcolors.FAIL}No dataset found!{bcolors.ENDC}\n') return titles = [x.text.strip() for x in dataset] all_links = [x['href'] for x in dataset] sys.stdout.write( f'{bcolors.BOLD}Downloading dataset...{bcolors.ENDC}\n') if not os.path.exists(folder): os.mkdir(folder) if(not os.path.exists(os.path.join(folder, 'Dataset'))): os.mkdir(os.path.join(folder, 'Dataset')) for link, title in zip(all_links, titles): dir = os.path.join(folder, 'Dataset', title) + \ '.' + link.split('.')[-1] helper.download_file(con, link, dir)
def get_course_id_and_title(course_url): page = con.session.get(helper.fix_link(course_url)) soup = BeautifulSoup(page.text, 'html.parser') try: title = soup.find('title').getText().split('|')[0].strip() except Exception as e: message = e.args return course_id = re.search(r'/course_(\d+)/', page.text).group(1) return course_id, title
def download_track(url, folder, videos_download, exercise_download, datasets_download): page = con.session.get(helper.fix_link(url)) soup = BeautifulSoup(page.text, 'html.parser') all_courses = soup.findAll('a', { 'href': re.compile('^/courses/'), 'class': re.compile('^course') }) track_title = soup.find('title').getText().split('|')[0].strip() folder = os.path.join(folder, track_title) all_links = ['https://www.datacamp.com' + x['href'] for x in all_courses] for i in all_links: if i.endswith('/continue'): all_links.remove(i) all_links = list(dict.fromkeys(all_links)) sys.stdout.write(f'{bcolors.BKBLUE} {track_title} {bcolors.BKENDC}\n') for i, link in enumerate(all_links): download_course(link, folder, videos_download, exercise_download, datasets_download, i + 1)