Ejemplo n.º 1
0
def get_parse_dict(danmaku_list):
    logging.info("Starting parsing sentences in Danmaku...")
    parse_dict = dict()
    jieba.load_userdict(constants.USER_DICT_PATH)
    emotion_dict_path = os.path.join(FileUtil.get_project_root_path(),
                                     "WordSegment", "emotion_dict.txt")
    emotion_dict = load_emotion_dict(emotion_dict_path)
    for danmaku in danmaku_list:
        rowId = danmaku.rowId
        if danmaku.content is not None:
            words = wordSegment(emotion_dict, danmaku.content)
            parse_dict[rowId] = words
        else:
            parse_dict[rowId] = None
    logging.info("parse dictionary has generated!")
    return parse_dict
Ejemplo n.º 2
0
    parse_dict = dict()
    jieba.load_userdict(constants.USER_DICT_PATH)
    emotion_dict_path = os.path.join(FileUtil.get_project_root_path(),
                                     "WordSegment", "emotion_dict.txt")
    emotion_dict = load_emotion_dict(emotion_dict_path)
    for danmaku in danmaku_list:
        rowId = danmaku.rowId
        if danmaku.content is not None:
            words = wordSegment(emotion_dict, danmaku.content)
            parse_dict[rowId] = words
        else:
            parse_dict[rowId] = None
    logging.info("parse dictionary has generated!")
    return parse_dict


if __name__ == "__main__":
    # 测试代码
    danmaku_list = getDataSource(constants.DATASOURCE)
    emotion_dict_path = os.path.join(FileUtil.get_project_root_path(),
                                     "WordSegment", "emotion_dict.txt")
    emotion_dict = load_emotion_dict(emotion_dict_path)
    for (key, value_set) in emotion_dict.items():
        print key, u"\t", u"\t".join(value for value in value_set), u"\n"
    for danmaku in danmaku_list:
        if danmaku.content is None:
            continue
        words = wordSegment(emotion_dict, danmaku.content)
        for word in words:
            print word.content
Ejemplo n.º 3
0
    for barrage_tuple in barrages:
        barrage = Barrage(play_timestamp=barrage_tuple[0],
                          type=barrage_tuple[1],
                          font_size=barrage_tuple[2],
                          font_color=barrage_tuple[3],
                          unix_timestamp=barrage_tuple[4],
                          pool=barrage_tuple[5],
                          sender_id=barrage_tuple[6],
                          row_id=barrage_tuple[7],
                          content=barrage_tuple[8])
        barrage_list.append(barrage)
    return barrage_list


if __name__ == "__main__":
    # 测试代码,测试从xml文件中读取的数据。
    # danmakuList = getDanmakuListFromTxtFile(FILE_PATH)
    # for danmaku in danmakuList:
    #     print danmaku.videoSecond, u"\t", danmaku.content

    # 将xml文件的弹幕数据读取出来,排序后写入当前的文件夹下。
    # gen_sorted_danmaku_file_from_xml(os.path.join(FileUtil.get_project_root_path(), "data", "movie", "2065063.xml"))

    # 从弹幕的live数据中读取信息。
    danmaku_list = getDanmakuListFromLiveTextFile(
        os.path.join(FileUtil.get_project_root_path(), "data", "AlphaGo",
                     "bilibili", "2016-03-09.txt"))
    for danmaku in danmaku_list:
        print str(danmaku.videoSecond
                  ), u"\t", danmaku.senderId, u"\t", danmaku.content, u"\n"