def get_resource(term_id, token, file_types=[VIDEO, PDF, RICH_TEXT]): """ 获取课件信息 """ resource_list = [] course_info = get_courseinfo(term_id, token) for chapter_num, chapter in enumerate( course_info.get("results").get("termDto").get("chapters")): for lesson_num, lesson in enumerate(chapter.get("lessons")): for unit_num, unit in enumerate(lesson.get("units")): if unit["contentType"] not in file_types: continue courseware_num = (chapter_num + 1, lesson_num + 1, unit_num + 1) file_path = CONFIG["file_path_template"].format( base_dir=base_dir, sep=os.path.sep, type=COURSEWARE.get(unit["contentType"], "Unknown"), cnt_1=get_section_num(courseware_num, level=1), cnt_2=get_section_num(courseware_num, level=2), cnt_3=get_section_num(courseware_num, level=3), chapter_name=repair_filename(chapter["name"]), lesson_name=repair_filename(lesson["name"]), unit_name=repair_filename(unit["name"]), ) touch_dir(os.path.dirname(file_path)) if unit["contentType"] == VIDEO: ext = ".mp4" file_path += ext playlist.write_path(file_path) resource_list.append( (VIDEO, file_path, unit["id"], unit["contentId"])) elif unit["contentType"] == PDF: file_path += ".pdf" resource_list.append( (PDF, file_path, unit["id"], unit["contentId"])) elif unit["contentType"] == RICH_TEXT: if unit.get("jsonContent"): json_content = eval(unit["jsonContent"]) file_path = CONFIG["file_path_template"].format( base_dir=base_dir, sep=os.path.sep, type="File", cnt_1=get_section_num(courseware_num, level=1), cnt_2=get_section_num(courseware_num, level=2), cnt_3=get_section_num(courseware_num, level=3), chapter_name=repair_filename(chapter["name"]), lesson_name=repair_filename(lesson["name"]), unit_name=repair_filename( os.path.splitext(json_content["fileName"])[0]) + os.path.splitext(json_content["fileName"])[1], ) touch_dir(os.path.dirname(file_path)) resource_list.append( (RICH_TEXT, file_path, json_content)) return resource_list
def get_resource(term_id, token): """ 获取课件信息 """ resource_list = [] course_info = get_courseinfo(term_id, token) for chapter_num, chapter in enumerate(course_info.get('results').get('termDto').get('chapters')): for lesson_num, lesson in enumerate(chapter.get('lessons')): for unit_num, unit in enumerate(lesson.get('units')): courseware_num = (chapter_num+1, lesson_num+1, unit_num+1) file_path = os.path.join( base_dir, get_section_num(courseware_num, level=1) + " " + repair_filename(chapter["name"]), get_section_num(courseware_num, level=2) + " " + repair_filename(lesson["name"]), get_section_num(courseware_num, level=3) + " " + repair_filename(unit["name"]) ) touch_dir(os.path.dirname(file_path)) if unit['contentType'] == VIDEO: ext = '.mp4' file_path += ext playlist.write_path(file_path) resource_list.append(( VIDEO, file_path, unit['id'], unit['contentId'] )) elif unit['contentType'] == PDF: file_path += ".pdf" resource_list.append(( PDF, file_path, unit['id'], unit['contentId'] )) elif unit['contentType'] == RICH_TEXT: if unit.get('jsonContent'): json_content = eval(unit['jsonContent']) file_path = os.path.join( base_dir, get_section_num(courseware_num, level=1) + " " + repair_filename(chapter["name"]), get_section_num(courseware_num, level=2) + " " + repair_filename(lesson["name"]), get_section_num(courseware_num, level=3) + " " + repair_filename(json_content["fileName"]) ) resource_list.append(( RICH_TEXT, file_path, json_content )) return resource_list
def parse(url, config): # 获取标题 CONFIG.update(config) spider.set_cookies(config["cookies"]) title = get_title(url) print(title) # 创建所需目录结构 CONFIG["base_dir"] = touch_dir(repair_filename(os.path.join( CONFIG['dir'], title + " - bilibili"))) CONFIG["video_dir"] = touch_dir(os.path.join(CONFIG['base_dir'], "Videos")) if CONFIG["playlist_type"] == "dpl": CONFIG['playlist'] = Dpl(os.path.join( CONFIG['base_dir'], 'Playlist.dpl'), path_type=CONFIG["playlist_path_type"]) elif CONFIG["playlist_type"] == "m3u": CONFIG['playlist'] = M3u(os.path.join( CONFIG['base_dir'], 'Playlist.m3u'), path_type=CONFIG["playlist_path_type"]) else: CONFIG['playlist'] = None # 获取需要的信息 videos = get_videos(url) CONFIG["videos"] = videos if CONFIG['playlist'] is not None: CONFIG['playlist'].flush() # 解析并过滤不需要的选集 episodes = parse_episodes(CONFIG["episodes"], len(videos)) videos = list(filter(lambda video: video.id in episodes, videos)) CONFIG["videos"] = videos # 解析片段信息及视频 url for i, video in enumerate(videos): print("{:02}/{:02} parsing segments info...".format(i, len(videos)), end="\r") parse_segment_info(video) # 导出下载所需数据 exports.update({ "videos": videos, "video_dir": CONFIG["video_dir"] })
def convert_danmaku(video_path_list): """ 将视频文件夹下的 xml 弹幕转换为 ass 弹幕 """ # 检测插件是否已经就绪 plugin_url = "https://raw.githubusercontent.com/m13253/danmaku2ass/master/danmaku2ass.py" plugin_path = "plugins/danmaku2ass.py" touch_dir(os.path.dirname(plugin_path)) touch_file(os.path.join(os.path.dirname(plugin_path), "__init__.py")) if not os.path.exists(plugin_path): print("下载插件中……") res = requests.get(plugin_url) with open(plugin_path, "w", encoding="utf8") as f: f.write(res.text) # 使用插件进行转换 from plugins.danmaku2ass import Danmaku2ASS for video_path in video_path_list: name = os.path.splitext(video_path)[0] print("convert {} ".format(os.path.split(name)[-1]), end="\r") if not os.path.exists(name+".mp4") or \ not os.path.exists(name+".xml"): continue cap = cv2.VideoCapture(name + ".mp4") __, frame = cap.read() h, w, __ = frame.shape Danmaku2ASS(name + ".xml", "autodetect", name + ".ass", w, h, reserve_blank=0, font_face=_('(FONT) sans-serif')[7:], font_size=w / 40, text_opacity=0.8, duration_marquee=15.0, duration_still=10.0, comment_filter=None, is_reduce_comments=False, progress_callback=None)
def get_resource(term_id, token, file_types=[VIDEO, PDF, RICH_TEXT]): """ 获取课件信息 """ resource_list = [] course_info = get_courseinfo(term_id, token) for chapter_num, chapter in enumerate(course_info.get('results').get('termDto').get('chapters')): for lesson_num, lesson in enumerate(chapter.get('lessons')): for unit_num, unit in enumerate(lesson.get('units')): if unit['contentType'] not in file_types: continue courseware_num = (chapter_num+1, lesson_num+1, unit_num+1) file_path = CONFIG['file_path_template'].format( base_dir=base_dir, sep=os.path.sep, type=COURSEWARE.get(unit['contentType'], 'Unknown'), cnt_1=get_section_num(courseware_num, level=1), cnt_2=get_section_num(courseware_num, level=2), cnt_3=get_section_num(courseware_num, level=3), chapter_name=repair_filename(chapter["name"]), lesson_name=repair_filename(lesson["name"]), unit_name=repair_filename(unit["name"]) ) touch_dir(os.path.dirname(file_path)) if unit['contentType'] == VIDEO: ext = '.mp4' file_path += ext playlist.write_path(file_path) resource_list.append(( VIDEO, file_path, unit['id'], unit['contentId'] )) elif unit['contentType'] == PDF: file_path += ".pdf" resource_list.append(( PDF, file_path, unit['id'], unit['contentId'] )) elif unit['contentType'] == RICH_TEXT: if unit.get('jsonContent'): json_content = eval(unit['jsonContent']) file_path = CONFIG['file_path_template'].format( base_dir=base_dir, sep=os.path.sep, type='File', cnt_1=get_section_num(courseware_num, level=1), cnt_2=get_section_num(courseware_num, level=2), cnt_3=get_section_num(courseware_num, level=3), chapter_name=repair_filename(chapter["name"]), lesson_name=repair_filename(lesson["name"]), unit_name=repair_filename(os.path.splitext(json_content["fileName"])[0]) + \ os.path.splitext(json_content["fileName"])[1] ) touch_dir(os.path.dirname(file_path)) resource_list.append(( RICH_TEXT, file_path, json_content )) return resource_list
if __name__ == "__main__": root = CONFIG["root"] num_thread = CONFIG["num_thread"] url = sys.argv[1] # 登录并获取信息 token = login(CONFIG["username"], CONFIG["password"]) term_id, course_name = get_summary(url) course_id = re.match(r"https?://www.icourse163.org/(course|learn)/\w+-(\d+)", url).group(2) print(course_name) print(course_id) # 创建必要环境 base_dir = touch_dir(os.path.join(root, course_name)) playlist = Dpl(os.path.join(base_dir, 'Playlist.dpl')) # 获取资源列表 resource_list = get_resource(term_id, token, file_types=CONFIG['file_types']) # 解析资源 resources = [] merge_list = [] for i, resource in enumerate(resource_list): print("parse_resource {}/{}".format(i, len(resource_list)), end="\r") url, file_path, params = parse_resource(resource, token) # 过滤掉已经下载的资源 if os.path.exists(file_path) and not CONFIG['overwrite']: print('[info] {} already exists!'.format(file_path)) continue