Ejemplo n.º 1
0
 def get_refresh_video_barrage(self, cid, row_barrages):
     barrage_file_path = FileUtil.get_barrage_file_path(cid)
     # 检查该cid的弹幕文件是否存在,如果不存在,那么此时的row_barrages数据将全部写入文件中,
     # 如果存在,那么就只要找到更新的弹幕记录。
     barrage_count = 0
     if FileUtil.is_file_exists(barrage_file_path):
         last_barrage_index = -1  # 记录文件中最后一条弹幕在row_barrages中的下标。
         barrage_count = FileUtil.get_file_line_count(barrage_file_path)
         last_n_barrages = FileUtil.get_file_last_n_line_content(barrage_file_path, 5)
         Logger.print_console_info(u"当前文件的最后n条弹幕:\n" + u"\n".join(last_n_barrages))
         for index in xrange(len(row_barrages) - 1, -1, -1):
             if self.__is_same_barrage(last_n_barrages, row_barrages[index]):
                 # 获得存储在弹幕文件中的最后一条弹幕,在更新弹幕序列中的位置。
                 last_barrage_index = index
                 break
         # 当前弹幕数据没有更新
         if last_barrage_index == (len(row_barrages) - 1):
             row_barrages = []
             Logger.print_console_info(unicode(DateTimeUtil.get_cur_timestamp("%Y-%m-%d %H:%M:%S")) +
                                            u"\t" + u"弹幕数据没有更新。")
         # 此时部分的弹幕数据需要更新
         elif last_barrage_index >= 0:
             Logger.print_console_info(unicode(DateTimeUtil.get_cur_timestamp("%Y-%m-%d %H:%M:%S")) +
                                            u"\t" + u"有弹幕数据更新:" +
                                            u"\t" + str(len(row_barrages) - last_barrage_index - 1))
             row_barrages = row_barrages[last_barrage_index + 1: len(row_barrages)]
         # 弹幕全文都要更新
         elif last_barrage_index == -1:
             Logger.print_console_info(unicode(DateTimeUtil.get_cur_timestamp("%Y-%m-%d %H:%M:%S")) + u"\t" +
                                            u"有弹幕数据更新:" + u"\t" + str(len(row_barrages)))
     barrage_count += len(row_barrages)
     Logger.print_console_info(unicode(DateTimeUtil.get_cur_timestamp("%Y-%m-%d %H:%M:%S")) +
                                    u" 当前弹幕总条数:" + unicode(barrage_count) + u"\n\n")
     return row_barrages
Ejemplo n.º 2
0
def build_window(danmaku_list, window_size, step_length, parse_dict):
    window_list = []
    current_start = 0
    current_end = current_start + window_size
    current_danmaku = []
    current_index = 0
    if FileUtil.is_file_exists(constants.WINDOW_LOG):
        os.remove(constants.WINDOW_LOG)
    while current_start < danmaku_list[-1].videoSecond:
        logging.info("Building time window " + str(current_index) + "...")
        for danmaku in danmaku_list:
            if current_start <= danmaku.videoSecond <= current_end:
                current_danmaku.append(danmaku)
            elif danmaku.videoSecond > current_end:
                break
        # write_window_log(current_index, current_start, current_end, current_danmaku)
        time_window = TimeWindow(current_index, current_start, current_end)
        time_window.buildUsers(danmakuutil.extract_users(current_danmaku))
        time_window.buildTSCs(len(current_danmaku))
        time_window.buildTSCLength(current_danmaku)
        # time_window.buildEntropy(current_danmaku, parse_dict)
        # time_window.buildUserFeature(danmakuutil.extract_user_feature(current_danmaku, parse_dict, "Word-Frequency"))
        window_list.append(time_window)

        current_index += 1
        current_start += step_length
        current_danmaku = []
        current_end = current_start + window_size

    return window_list
 def get_refresh_video_barrage(self, cid, row_barrages):
     barrage_file_path = FileUtil.get_barrage_file_path(cid)
     # 检查该cid的弹幕文件是否存在,如果不存在,那么此时的row_barrages数据将全部写入文件中,
     # 如果存在,那么就只要找到更新的弹幕记录。
     barrage_count = 0
     if FileUtil.is_file_exists(barrage_file_path):
         last_barrage_index = -1  # 记录文件中最后一条弹幕在row_barrages中的下标。
         barrage_count = FileUtil.get_file_line_count(barrage_file_path)
         last_n_barrages = FileUtil.get_file_last_n_line_content(barrage_file_path, 5)
         Logger.print_console_info(u"当前文件的最后n条弹幕:\n" + u"\n".join(last_n_barrages))
         for index in xrange(len(row_barrages) - 1, -1, -1):
             if self.__is_same_barrage(last_n_barrages, row_barrages[index]):
                 # 获得存储在弹幕文件中的最后一条弹幕,在更新弹幕序列中的位置。
                 last_barrage_index = index
                 break
         # 当前弹幕数据没有更新
         if last_barrage_index == (len(row_barrages) - 1):
             row_barrages = []
             Logger.print_console_info(unicode(DateTimeUtil.get_cur_timestamp("%Y-%m-%d %H:%M:%S")) +
                                            u"\t" + u"弹幕数据没有更新。")
         # 此时部分的弹幕数据需要更新
         elif last_barrage_index >= 0:
             Logger.print_console_info(unicode(DateTimeUtil.get_cur_timestamp("%Y-%m-%d %H:%M:%S")) +
                                            u"\t" + u"有弹幕数据更新:" +
                                            u"\t" + str(len(row_barrages) - last_barrage_index - 1))
             row_barrages = row_barrages[last_barrage_index + 1: len(row_barrages)]
         # 弹幕全文都要更新
         elif last_barrage_index == -1:
             Logger.print_console_info(unicode(DateTimeUtil.get_cur_timestamp("%Y-%m-%d %H:%M:%S")) + u"\t" +
                                            u"有弹幕数据更新:" + u"\t" + str(len(row_barrages)))
     barrage_count += len(row_barrages)
     Logger.print_console_info(unicode(DateTimeUtil.get_cur_timestamp("%Y-%m-%d %H:%M:%S")) +
                                    u" 当前弹幕总条数:" + unicode(barrage_count) + u"\n\n")
     return row_barrages
Ejemplo n.º 4
0
            if feature1 is not None and feature2 is not None:
                sim = simutil.word_frequency_cos_sim(feature1, feature2)
                if sim > 0:
                    count += 1
                cmatrix[index1, index2] = sim
                cmatrix[index2, index1] = sim
    print count
    return cmatrix


if __name__ == "__main__":
    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                        level=logging.INFO)
    # 首先检查弹幕的输出文件夹是否存在,如不存在,那么创建该文件夹。
    FileUtil.create_dir_if_not_exist(constants.DUMP_PATH)
    if FileUtil.is_file_exists(constants.PARSE_LOG):
        os.remove(constants.PARSE_LOG)
    danmakuList = getDataSource(constants.DATASOURCE)
    constants.USERID = list(danmakuutil.extract_users(danmakuList))
    parse_dict = WordSegment.get_parse_dict(danmakuList)
    # GensimSupport.get_corpus(parse_dict)
    windowList = build_window(danmakuList, constants.WINDOW_SIZE,
                              constants.STEP_LENGTH, parse_dict)
    # getStatistics(windowList)
    # for time_window in windowList:
    #     logging.info("Start generating matrix" + str(time_window.index) + "...")
    #     matrix = generateMatrix(time_window)
    #     matrix_file_name = "matrix"+str(time_window.index)+".txt"
    #     with open(os.path.join(constants.DUMP_PATH, matrix_file_name), mode="w") as f:
    #         np.savetxt(f, matrix, fmt='%.2f', newline='\n')