Example #1
0
def download_syllabus_icourse163(session, leclist, path = '', overwrite = False):

    headers = {
                'Accept':'*/*',
                'Accept-Encoding':'gzip, deflate, sdch',
                'Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4',
                'Connection':'keep-alive',
                'Host':'v.stu.126.net', #*
                'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36',
                'X-Requested-With':'ShockwaveFlash/15.0.0.239',
               }

    session.headers.update(headers)

    retry_list = []
    for week in leclist:
        cur_week = week[0]
        lessons = week[1]
        for lesson in lessons:
            cur_lesson = lesson[0]
            lectures = lesson[1]
            cur_week = clean_filename(cur_week)
            cur_lesson = clean_filename(cur_lesson)
            dir = os.path.join(path, cur_week, cur_lesson)
            if not os.path.exists(dir):
                mkdir_p(dir)

            for (lecnum, (lecture_url, lecture_name)) in enumerate(lectures):
                lecture_name = clean_filename(lecture_name)
                filename = os.path.join(dir,"%02d_%s.%s"%(lecnum+1, lecture_name, lecture_url[-3:]))
                print (filename)
                print (lecture_url)
                try:
                    resume_download_file(session, lecture_url, filename, overwrite )
                except Exception as e:
                    print(e)
                    print('Error, add it to retry list')
                    retry_list.append((lecture_url, filename))

    retry_times = 0
    while len(retry_list) != 0 and retry_times < 3:
        print('%d items should be retried, retrying...' % len(retry_list))
        tmp_list = [item for item in retry_list]
        retry_times += 1
        for (url, filename) in tmp_list:
            try:
                print(url)
                print(filename)
                resume_download_file(session, url, filename, overwrite )
            except Exception as e:
                print(e)
                print('Error, add it to retry list')
                continue

            retry_list.remove((url, filename)) 
    
    if len(retry_list) != 0:
        print('%d items failed, please check it' % len(retry_list))
    else:
        print('All done.')
Example #2
0
def download_syllabus_icourse163(session, leclist, path = '', overwrite = False):

    headers = {
                'Accept':'*/*',
                'Accept-Encoding':'gzip, deflate, sdch',
                'Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4',
                'Connection':'keep-alive',
                'Host':'v.stu.126.net', #*
                'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36',
                'X-Requested-With':'ShockwaveFlash/15.0.0.239',
               }

    session.headers.update(headers)

    retry_list = []
    for week in leclist:
        cur_week = week[0]
        lessons = week[1]
        for lesson in lessons:
            cur_lesson = lesson[0]
            lectures = lesson[1]
            cur_week = clean_filename(cur_week)
            cur_lesson = clean_filename(cur_lesson)
            dir = os.path.join(path, cur_week, cur_lesson)
            if not os.path.exists(dir):
                mkdir_p(dir)

            for (lecnum, (lecture_url, lecture_name)) in enumerate(lectures):
                lecture_name = clean_filename(lecture_name)
                filename = os.path.join(dir,"%02d_%s.%s"%(lecnum+1, lecture_name, lecture_url[-3:]))
                print (filename)
                print (lecture_url)
                try:
                    resume_download_file(session, lecture_url, filename, overwrite )
                except Exception as e:
                    print(e)
                    print('Error, add it to retry list')
                    retry_list.append((lecture_url, filename))

    retry_times = 0
    while len(retry_list) != 0 and retry_times < 3:
        print('%d items should be retried, retrying...' % len(retry_list))
        tmp_list = [item for item in retry_list]
        retry_times += 1
        for (url, filename) in tmp_list:
            try:
                print(url)
                print(filename)
                resume_download_file(session, url, filename, overwrite )
            except Exception as e:
                print(e)
                print('Error, add it to retry list')
                continue

            retry_list.remove((url, filename)) 
    
    if len(retry_list) != 0:
        print('%d items failed, please check it' % len(retry_list))
    else:
        print('All done.')
Example #3
0
def download_file(term, output_folder):
    failure_list = []
    success_count = 0
    for week_name, lessons in term.items():
        week_path = os.path.join(output_folder, week_name)
        if not os.path.exists(week_path):
            os.mkdir(week_path)
        for lesson_name, files in lessons.items():
            if len(files) == 0:  # 排除`讨论`,`实验`等没有文件的lesson
                continue
            lesson_path = os.path.join(week_path, lesson_name)
            if not os.path.exists(lesson_path):
                os.mkdir(lesson_path)
            for file_name, file_url in files.items():
                if not file_url:
                    continue
                logger.info('[downloading] %s ---> %s', file_name, lesson_path)
                full_file_path = os.path.join(lesson_path, file_name)
                try:
                    if '.m3u8' in file_url:
                        os.system('{} -i {} -c copy -bsf:a aac_adtstoasc {}'.format(FFMPEG_BIN, file_url, full_file_path))
                    else:
                        resume_download_file(sess, file_url, full_file_path)
                    success_count += 1
                except Exception as e:
                    logger.warning('下载失败,下载链接:{}'.format(file_url))
                    failure_list.append((replace_url_host(file_url), full_file_path))

    retries = 3
    curr_retry = 1
    while curr_retry < retries:
        for file_url, full_file_path in failure_list:
            try:
                logger.info('第{}次重试,文件:{}'.format(curr_retry, full_file_path))
                resume_download_file(sess, file_url, full_file_path)
            except Exception as e:
                logger.warning('第{}次重试失败,下载链接:{}'.format(curr_retry, file_url))
                continue
            failure_list.remove((file_url, full_file_path))
            success_count += 1
        if len(failure_list) == 0:
            break
    logger.info('下载完成, 成功:{}个, 失败:{}'.format(success_count, len(failure_list)))
Example #4
0
def download_file(term, output_folder):
    failure_list = []
    success_count = 0
    for week_name, lessons in term.items():
        week_path = os.path.join(output_folder, week_name)
        if not os.path.exists(week_path):
            os.mkdir(week_path)
        for lesson_name, files in lessons.items():
            lesson_path = os.path.join(week_path, lesson_name)
            if len(files) == 0:  # 排除`讨论`,`实验`等没有文件的lesson
                continue
            if not os.path.exists(lesson_path):
                os.mkdir(lesson_path)
            for file_name, file_url in files:
                if not file_url:
                    continue
                logger.info('[downloading] %s ---> %s', file_name, lesson_path)
                full_file_path = os.path.join(lesson_path, file_name)
                try:
                    resume_download_file(sess, file_url, full_file_path)
                    success_count += 1
                except Exception as e:
                    logger.warning('download %s fail', file_name)
                    failure_list.append((file_url, full_file_path))

    retries = 3
    curr_retry = 0
    while curr_retry < retries:
        for file_url, full_file_path in failure_list:
            try:
                resume_download_file(sess, file_url, full_file_path)
            except:
                continue
            failure_list.remove((file_url, full_file_path))
            success_count += 1
        if len(failure_list) == 0:
            break
    logger.info('download complete, success %d, fail %d', success_count,
                len(failure_list))