def download_syllabus_icourse163(session, leclist, path = '', overwrite = False): headers = { 'Accept':'*/*', 'Accept-Encoding':'gzip, deflate, sdch', 'Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4', 'Connection':'keep-alive', 'Host':'v.stu.126.net', #* 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36', 'X-Requested-With':'ShockwaveFlash/15.0.0.239', } session.headers.update(headers) retry_list = [] for week in leclist: cur_week = week[0] lessons = week[1] for lesson in lessons: cur_lesson = lesson[0] lectures = lesson[1] cur_week = clean_filename(cur_week) cur_lesson = clean_filename(cur_lesson) dir = os.path.join(path, cur_week, cur_lesson) if not os.path.exists(dir): mkdir_p(dir) for (lecnum, (lecture_url, lecture_name)) in enumerate(lectures): lecture_name = clean_filename(lecture_name) filename = os.path.join(dir,"%02d_%s.%s"%(lecnum+1, lecture_name, lecture_url[-3:])) print (filename) print (lecture_url) try: resume_download_file(session, lecture_url, filename, overwrite ) except Exception as e: print(e) print('Error, add it to retry list') retry_list.append((lecture_url, filename)) retry_times = 0 while len(retry_list) != 0 and retry_times < 3: print('%d items should be retried, retrying...' % len(retry_list)) tmp_list = [item for item in retry_list] retry_times += 1 for (url, filename) in tmp_list: try: print(url) print(filename) resume_download_file(session, url, filename, overwrite ) except Exception as e: print(e) print('Error, add it to retry list') continue retry_list.remove((url, filename)) if len(retry_list) != 0: print('%d items failed, please check it' % len(retry_list)) else: print('All done.')
def download_file(term, output_folder): failure_list = [] success_count = 0 for week_name, lessons in term.items(): week_path = os.path.join(output_folder, week_name) if not os.path.exists(week_path): os.mkdir(week_path) for lesson_name, files in lessons.items(): if len(files) == 0: # 排除`讨论`,`实验`等没有文件的lesson continue lesson_path = os.path.join(week_path, lesson_name) if not os.path.exists(lesson_path): os.mkdir(lesson_path) for file_name, file_url in files.items(): if not file_url: continue logger.info('[downloading] %s ---> %s', file_name, lesson_path) full_file_path = os.path.join(lesson_path, file_name) try: if '.m3u8' in file_url: os.system('{} -i {} -c copy -bsf:a aac_adtstoasc {}'.format(FFMPEG_BIN, file_url, full_file_path)) else: resume_download_file(sess, file_url, full_file_path) success_count += 1 except Exception as e: logger.warning('下载失败,下载链接:{}'.format(file_url)) failure_list.append((replace_url_host(file_url), full_file_path)) retries = 3 curr_retry = 1 while curr_retry < retries: for file_url, full_file_path in failure_list: try: logger.info('第{}次重试,文件:{}'.format(curr_retry, full_file_path)) resume_download_file(sess, file_url, full_file_path) except Exception as e: logger.warning('第{}次重试失败,下载链接:{}'.format(curr_retry, file_url)) continue failure_list.remove((file_url, full_file_path)) success_count += 1 if len(failure_list) == 0: break logger.info('下载完成, 成功:{}个, 失败:{}'.format(success_count, len(failure_list)))
def download_file(term, output_folder): failure_list = [] success_count = 0 for week_name, lessons in term.items(): week_path = os.path.join(output_folder, week_name) if not os.path.exists(week_path): os.mkdir(week_path) for lesson_name, files in lessons.items(): lesson_path = os.path.join(week_path, lesson_name) if len(files) == 0: # 排除`讨论`,`实验`等没有文件的lesson continue if not os.path.exists(lesson_path): os.mkdir(lesson_path) for file_name, file_url in files: if not file_url: continue logger.info('[downloading] %s ---> %s', file_name, lesson_path) full_file_path = os.path.join(lesson_path, file_name) try: resume_download_file(sess, file_url, full_file_path) success_count += 1 except Exception as e: logger.warning('download %s fail', file_name) failure_list.append((file_url, full_file_path)) retries = 3 curr_retry = 0 while curr_retry < retries: for file_url, full_file_path in failure_list: try: resume_download_file(sess, file_url, full_file_path) except: continue failure_list.remove((file_url, full_file_path)) success_count += 1 if len(failure_list) == 0: break logger.info('download complete, success %d, fail %d', success_count, len(failure_list))