Example #1
0
def download_datasets(link, folder):
    page = con.session.get(helper.fix_link(link))
    soup = BeautifulSoup(page.text, 'html.parser')

    dataset = soup.findAll('a', {
        'href': re.compile('^https'),
        'class': re.compile('^link-borderless')
    })
    if len(dataset) == 0:
        sys.stdout.write(
            f'{bcolors.FAIL}No dataset found!{bcolors.ENDC}\n')
        return

    titles = [x.text.strip() for x in dataset]
    all_links = [x['href'] for x in dataset]
    sys.stdout.write(
        f'{bcolors.BOLD}Downloading dataset...{bcolors.ENDC}\n')
    if not os.path.exists(folder):
        os.mkdir(folder)
    if(not os.path.exists(os.path.join(folder, 'Dataset'))):
        os.mkdir(os.path.join(folder, 'Dataset'))
    for link, title in zip(all_links, titles):
        dir = os.path.join(folder, 'Dataset', title) + \
            '.' + link.split('.')[-1]
        helper.download_file(con, link, dir)
Example #2
0
def get_course_id_and_title(course_url):
    page = con.session.get(helper.fix_link(course_url))
    soup = BeautifulSoup(page.text, 'html.parser')
    try:
        title = soup.find('title').getText().split('|')[0].strip()
    except Exception as e:
        message = e.args
        return
    course_id = re.search(r'/course_(\d+)/', page.text).group(1)
    return course_id, title
Example #3
0
def download_track(url, folder, videos_download, exercise_download,
                   datasets_download):
    page = con.session.get(helper.fix_link(url))
    soup = BeautifulSoup(page.text, 'html.parser')
    all_courses = soup.findAll('a', {
        'href': re.compile('^/courses/'),
        'class': re.compile('^course')
    })
    track_title = soup.find('title').getText().split('|')[0].strip()
    folder = os.path.join(folder, track_title)

    all_links = ['https://www.datacamp.com' + x['href'] for x in all_courses]
    for i in all_links:
        if i.endswith('/continue'):
            all_links.remove(i)
    all_links = list(dict.fromkeys(all_links))
    sys.stdout.write(f'{bcolors.BKBLUE}  {track_title}  {bcolors.BKENDC}\n')
    for i, link in enumerate(all_links):
        download_course(link, folder, videos_download, exercise_download,
                        datasets_download, i + 1)