def get_course_id_and_title(course_url): page = con.session.get(helper.embbed_link(course_url)) soup = BeautifulSoup(page.text, 'html.parser') try: title = soup.find('title').getText().split('|')[0].strip() except Exception as e: message = e.args return id = soup.find("input", {"name": "course_id"})['value'] return id, title
def download_track(track, folder, videos_download): page = con.session.get(helper.embbed_link(track.link)) soup = BeautifulSoup(page.text, 'html.parser') all_courses = soup.findAll('a', { 'href': re.compile('^/courses/'), 'class': re.compile('^course') }) track_title = soup.find('title').getText().split('|')[0].strip() folder = os.path.join(folder, track_title) all_links = ['https://www.datacamp.com' + x['href'] for x in all_courses] sys.stdout.write(f'{bcolors.BKBLUE} {track_title} {bcolors.BKENDC}\n') for i, link in enumerate(all_links): download_course(link, folder, videos_download, i + 1)
def download_course_data(conn, link, path): page = con.session.get(helper.embbed_link(link)) soup = BeautifulSoup(page.text, 'html.parser') dataset = soup.findAll('a', { 'href': re.compile('^https'), 'class': re.compile('^link-borderless') }) titles = [x.text.strip() for x in dataset] all_links = [x['href'] for x in dataset] sys.stdout.write(f'{bcolors.BOLD}Downloading dataset...{bcolors.ENDC}\n') if not os.path.exists(path): os.mkdir(path) if (not os.path.exists(os.path.join(path, 'Dataset'))): os.mkdir(os.path.join(path, 'Dataset')) for link, title in zip(all_links, titles): dir = os.path.join(path, 'Dataset', title) dir = dir + '.' + link.split('.')[-1] download_file(con, link, dir)