Esempio n. 1
0
def get_videos(url):
    """ 从 url 中获取视频列表 """
    videos = []
    season_id = re.search(
        r'"param":{"season_id":(\d+),"season_type":\d+}', spider.get(url).text).group(1)

    info_url = info_api.format(season_id=season_id)
    res = spider.get(info_url)

    for i, item in enumerate(res.json()["result"]["main_section"]["episodes"]):
        index = item["title"]
        if re.match(r'^\d*\.?\d*$', index):
            index = '第{}话'.format(index)
        name = repair_filename(' '.join([index, item["long_title"]]))
        file_path = os.path.join(CONFIG['video_dir'], repair_filename(
            '{}.mp4'.format(name)))
        if CONFIG['playlist'] is not None:
            CONFIG['playlist'].write_path(file_path)
        videos.append(BililiVideo(
            id = i+1,
            name = name,
            path = file_path,
            meta = {
                "aid": item["aid"],
                "cid": item["cid"],
                "epid": item["id"]
            },
            segment_dl = CONFIG["segment_dl"],
            segment_size = CONFIG["segment_size"],
            overwrite = CONFIG["overwrite"],
            spider = spider
        ))
    return videos
Esempio n. 2
0
def get_videos(url):
    """ 从 url 中获取视频列表 """
    videos = []
    if re.match(r"https?://www.bilibili.com/video/av(\d+)", url):
        avid = re.match(r'https?://www.bilibili.com/video/av(\d+)', url).group(1)
    elif re.match(r"https?://b23.tv/av(\d+)", url):
        avid = re.match(r"https?://b23.tv/av(\d+)", url).group(1)
    CONFIG["avid"] = avid

    info_url = info_api.format(avid=avid)
    res = spider.get(info_url)

    for i, item in enumerate(res.json()["data"]):
        file_path = os.path.join(CONFIG['video_dir'], repair_filename(
            '{}.mp4'.format(item["part"])))
        if CONFIG['playlist'] is not None:
            CONFIG['playlist'].write_path(file_path)
        videos.append(BililiVideo(
            id = i+1,
            name = item["part"],
            path = file_path,
            meta = {
                "cid": item["cid"]
            },
            segment_dl = CONFIG["segment_dl"],
            segment_size = CONFIG["segment_size"],
            overwrite = CONFIG["overwrite"],
            spider = spider
        ))
    return videos
Esempio n. 3
0
def get_summary(url):
    """从课程主页面获取信息"""

    url = url.replace('learn/', 'course/')
    res = spider.get(url).text

    term_id = re.search(r'termId : "(\d+)"', res).group(1)
    names = re.findall(r'name:"(.+)"', res)
    course_name = " - ".join(names[1: ])
    # term_ids = re.findall(r'id : "(\d+)",\ncourse', res)

    return term_id, repair_filename(course_name)
Esempio n. 4
0
def get_resource(term_id, token, file_types=[VIDEO, PDF, RICH_TEXT]):
    """ 获取课件信息 """
    resource_list = []

    course_info = get_courseinfo(term_id, token)
    for chapter_num, chapter in enumerate(
            course_info.get("results").get("termDto").get("chapters")):
        for lesson_num, lesson in enumerate(chapter.get("lessons")):
            for unit_num, unit in enumerate(lesson.get("units")):
                if unit["contentType"] not in file_types:
                    continue
                courseware_num = (chapter_num + 1, lesson_num + 1,
                                  unit_num + 1)
                file_path = CONFIG["file_path_template"].format(
                    base_dir=base_dir,
                    sep=os.path.sep,
                    type=COURSEWARE.get(unit["contentType"], "Unknown"),
                    cnt_1=get_section_num(courseware_num, level=1),
                    cnt_2=get_section_num(courseware_num, level=2),
                    cnt_3=get_section_num(courseware_num, level=3),
                    chapter_name=repair_filename(chapter["name"]),
                    lesson_name=repair_filename(lesson["name"]),
                    unit_name=repair_filename(unit["name"]),
                )
                touch_dir(os.path.dirname(file_path))

                if unit["contentType"] == VIDEO:
                    ext = ".mp4"
                    file_path += ext
                    playlist.write_path(file_path)
                    resource_list.append(
                        (VIDEO, file_path, unit["id"], unit["contentId"]))
                elif unit["contentType"] == PDF:
                    file_path += ".pdf"
                    resource_list.append(
                        (PDF, file_path, unit["id"], unit["contentId"]))
                elif unit["contentType"] == RICH_TEXT:
                    if unit.get("jsonContent"):
                        json_content = eval(unit["jsonContent"])
                        file_path = CONFIG["file_path_template"].format(
                            base_dir=base_dir,
                            sep=os.path.sep,
                            type="File",
                            cnt_1=get_section_num(courseware_num, level=1),
                            cnt_2=get_section_num(courseware_num, level=2),
                            cnt_3=get_section_num(courseware_num, level=3),
                            chapter_name=repair_filename(chapter["name"]),
                            lesson_name=repair_filename(lesson["name"]),
                            unit_name=repair_filename(
                                os.path.splitext(json_content["fileName"])[0])
                            + os.path.splitext(json_content["fileName"])[1],
                        )
                        touch_dir(os.path.dirname(file_path))

                        resource_list.append(
                            (RICH_TEXT, file_path, json_content))

    return resource_list
Esempio n. 5
0
def parse_segment_info(video):
    """ 解析视频片段 url """

    cid, avid = video.meta["cid"], CONFIG["avid"]

    # 检查是否有字幕并下载
    subtitle_url = subtitle_api.format(avid=avid, cid=cid)
    res = spider.get(subtitle_url)
    subtitles_info = json.loads(re.search(r"<subtitle>(.+)</subtitle>", res.text).group(1))
    for sub_info in subtitles_info["subtitles"]:
        sub_path = os.path.splitext(video.path)[0] + sub_info["lan_doc"] + ".srt"
        subtitle = Subtitle(sub_path)
        for sub_line in spider.get("https:"+sub_info["subtitle_url"]).json()["body"]:
            subtitle.write_line(sub_line["content"], sub_line["from"], sub_line["to"])

    # 下载弹幕
    danmaku_url = danmaku_api.format(cid=cid)
    res = spider.get(danmaku_url)
    res.encoding = "utf-8"
    danmaku_path = os.path.splitext(video.path)[0] + ".xml"
    with open(danmaku_path, "w", encoding="utf-8") as f:
        f.write(res.text)

    # 检查是否可以下载,同时搜索支持的清晰度,并匹配最佳清晰度
    touch_message = spider.get(parse_api.format(
        avid=avid, cid=cid, qn=80)).json()
    if touch_message["code"] != 0:
        print("warn: 无法下载 {} ,原因: {}".format(
            video.name, touch_message["message"]))
        return

    accept_quality = touch_message['data']['accept_quality']
    for qn in CONFIG['qn_seq']:
        if qn in accept_quality:
            break

    parse_url = parse_api.format(avid=avid, cid=cid, qn=qn)
    res = spider.get(parse_url)

    for i, segment in enumerate(res.json()['data']['durl']):
        id = i + 1
        file_path = os.path.join(CONFIG['video_dir'], repair_filename(
                                '{}_{:02d}.flv'.format(video.name, id)))
        video.segments.append(BililiVideoSegment(
            id = id,
            path = file_path,
            url = segment["url"],
            qn = qn,
            video = video
        ))
Esempio n. 6
0
def parse_segment_info(video):
    """ 解析视频片段 url """

    segments = []
    aid, cid, ep_id = video.meta["aid"], video.meta["cid"], video.meta["epid"]

    # 下载弹幕
    danmaku_url = danmaku_api.format(cid=cid)
    res = spider.get(danmaku_url)
    res.encoding = "utf-8"
    danmaku_path = os.path.splitext(video.path)[0] + ".xml"
    with open(danmaku_path, "w", encoding="utf-8") as f:
        f.write(res.text)

    # 检查是否可以下载,同时搜索支持的清晰度,并匹配最佳清晰度
    touch_message = spider.get(parse_api.format(
        avid=aid, cid=cid, ep_id=ep_id, qn=80)).json()
    if touch_message["code"] != 0:
        print("warn: 无法下载 {} ,原因: {}".format(
            video.name, touch_message["message"]))
        video.switch_status(DONE)
        return
    if touch_message["result"]["is_preview"] == 1:
        print("warn: {} 为预览版视频".format(video.name))

    accept_quality = touch_message['result']['accept_quality']
    for qn in CONFIG['qn_seq']:
        if qn in accept_quality:
            break

    parse_url = parse_api.format(avid=aid, cid=cid, ep_id=ep_id, qn=qn)
    res = spider.get(parse_url)

    for i, segment in enumerate(res.json()['result']['durl']):
        id = i + 1
        file_path = os.path.join(CONFIG['video_dir'], repair_filename(
            '{}_{:02d}.flv'.format(video.name, id)))
        video.segments.append(BililiVideoSegment(
            id = id,
            path = file_path,
            url = segment["url"],
            qn = qn,
            video = video
        ))
Esempio n. 7
0
def get_resource(term_id, token):
    """ 获取课件信息 """
    resource_list = []

    course_info = get_courseinfo(term_id, token)
    for chapter_num, chapter in enumerate(course_info.get('results').get('termDto').get('chapters')):
        for lesson_num, lesson in enumerate(chapter.get('lessons')):
            for unit_num, unit in enumerate(lesson.get('units')):
                courseware_num = (chapter_num+1, lesson_num+1, unit_num+1)
                file_path = os.path.join(
                    base_dir,
                    get_section_num(courseware_num, level=1) + " " + repair_filename(chapter["name"]),
                    get_section_num(courseware_num, level=2) + " " + repair_filename(lesson["name"]),
                    get_section_num(courseware_num, level=3) + " " + repair_filename(unit["name"])
                )
                touch_dir(os.path.dirname(file_path))

                if unit['contentType'] == VIDEO:
                    ext = '.mp4'
                    file_path += ext
                    playlist.write_path(file_path)
                    resource_list.append((
                        VIDEO,
                        file_path,
                        unit['id'],
                        unit['contentId']
                    ))
                elif unit['contentType'] == PDF:
                    file_path += ".pdf"
                    resource_list.append((
                        PDF,
                        file_path,
                        unit['id'],
                        unit['contentId']
                    ))
                elif unit['contentType'] == RICH_TEXT:
                    if unit.get('jsonContent'):
                        json_content = eval(unit['jsonContent'])
                        file_path = os.path.join(
                            base_dir,
                            get_section_num(courseware_num, level=1) + " " + repair_filename(chapter["name"]),
                            get_section_num(courseware_num, level=2) + " " + repair_filename(lesson["name"]),
                            get_section_num(courseware_num, level=3) + " " + repair_filename(json_content["fileName"])
                        )
                        resource_list.append((
                            RICH_TEXT,
                            file_path,
                            json_content
                        ))

    return resource_list
Esempio n. 8
0
def parse(url, config):
    # 获取标题
    CONFIG.update(config)
    spider.set_cookies(config["cookies"])
    title = get_title(url)
    print(title)

    # 创建所需目录结构
    CONFIG["base_dir"] = touch_dir(repair_filename(os.path.join(
        CONFIG['dir'], title + " - bilibili")))
    CONFIG["video_dir"] = touch_dir(os.path.join(CONFIG['base_dir'], "Videos"))
    if CONFIG["playlist_type"] == "dpl":
        CONFIG['playlist'] = Dpl(os.path.join(
            CONFIG['base_dir'], 'Playlist.dpl'), path_type=CONFIG["playlist_path_type"])
    elif CONFIG["playlist_type"] == "m3u":
        CONFIG['playlist'] = M3u(os.path.join(
            CONFIG['base_dir'], 'Playlist.m3u'), path_type=CONFIG["playlist_path_type"])
    else:
        CONFIG['playlist'] = None

    # 获取需要的信息
    videos = get_videos(url)
    CONFIG["videos"] = videos
    if CONFIG['playlist'] is not None:
        CONFIG['playlist'].flush()

    # 解析并过滤不需要的选集
    episodes = parse_episodes(CONFIG["episodes"], len(videos))
    videos = list(filter(lambda video: video.id in episodes, videos))
    CONFIG["videos"] = videos

    # 解析片段信息及视频 url
    for i, video in enumerate(videos):
        print("{:02}/{:02} parsing segments info...".format(i, len(videos)), end="\r")
        parse_segment_info(video)

    # 导出下载所需数据
    exports.update({
        "videos": videos,
        "video_dir": CONFIG["video_dir"]
    })
Esempio n. 9
0
def get_resource(term_id, token, file_types=[VIDEO, PDF, RICH_TEXT]):
    """ 获取课件信息 """
    resource_list = []

    course_info = get_courseinfo(term_id, token)
    for chapter_num, chapter in enumerate(course_info.get('results').get('termDto').get('chapters')):
        for lesson_num, lesson in enumerate(chapter.get('lessons')):
            for unit_num, unit in enumerate(lesson.get('units')):
                if unit['contentType'] not in file_types:
                    continue
                courseware_num = (chapter_num+1, lesson_num+1, unit_num+1)
                file_path = CONFIG['file_path_template'].format(
                    base_dir=base_dir,
                    sep=os.path.sep,
                    type=COURSEWARE.get(unit['contentType'], 'Unknown'),
                    cnt_1=get_section_num(courseware_num, level=1),
                    cnt_2=get_section_num(courseware_num, level=2),
                    cnt_3=get_section_num(courseware_num, level=3),
                    chapter_name=repair_filename(chapter["name"]),
                    lesson_name=repair_filename(lesson["name"]),
                    unit_name=repair_filename(unit["name"])
                )
                touch_dir(os.path.dirname(file_path))

                if unit['contentType'] == VIDEO:
                    ext = '.mp4'
                    file_path += ext
                    playlist.write_path(file_path)
                    resource_list.append((
                        VIDEO,
                        file_path,
                        unit['id'],
                        unit['contentId']
                    ))
                elif unit['contentType'] == PDF:
                    file_path += ".pdf"
                    resource_list.append((
                        PDF,
                        file_path,
                        unit['id'],
                        unit['contentId']
                    ))
                elif unit['contentType'] == RICH_TEXT:
                    if unit.get('jsonContent'):
                        json_content = eval(unit['jsonContent'])
                        file_path = CONFIG['file_path_template'].format(
                            base_dir=base_dir,
                            sep=os.path.sep,
                            type='File',
                            cnt_1=get_section_num(courseware_num, level=1),
                            cnt_2=get_section_num(courseware_num, level=2),
                            cnt_3=get_section_num(courseware_num, level=3),
                            chapter_name=repair_filename(chapter["name"]),
                            lesson_name=repair_filename(lesson["name"]),
                            unit_name=repair_filename(os.path.splitext(json_content["fileName"])[0]) + \
                                                    os.path.splitext(json_content["fileName"])[1]
                        )
                        touch_dir(os.path.dirname(file_path))

                        resource_list.append((
                            RICH_TEXT,
                            file_path,
                            json_content
                        ))

    return resource_list