def get_danmaku_from_range(all_children, he_range): start, end = he_range start += 45 end += 45 return [ item.text for item in all_children if item.tag == 'd' and start <= get_time(item) <= end ]
def get_heat_time(all_children): interval = 2 center = 0 cur_entry = 0 final_time = get_time(all_children[-1]) cur_heat = 0 danmaku_queue = deque() heat_time = [[], []] while True: if center > final_time: break start = center - interval end = center + interval while cur_entry < len(all_children) and get_time( all_children[cur_entry]) < end: cur_danmaku = all_children[cur_entry] danmaku_queue.append(cur_danmaku) cur_heat += get_value(cur_danmaku) cur_entry += 1 while len(danmaku_queue) != 0 and get_time(danmaku_queue[0]) < start: prev_danmaku = danmaku_queue.popleft() cur_heat -= get_value(prev_danmaku) heat_time[0] += [center] heat_time[1] += [cur_heat] center += 1 heat_value = heat_time[1] heat_value_gaussian = half_gaussian_filter(heat_value, sigma=50) heat_value_gaussian2 = half_gaussian_filter(heat_value, sigma=1000) * 1.2 he_points = [[], []] cur_highest = -1 highest_idx = -1 he_start = -1 he_range = [] for i in range(len(heat_value_gaussian)): if highest_idx != -1: assert he_start != -1 if heat_value_gaussian[i] < heat_value_gaussian2[i]: he_points[0] += [highest_idx] he_points[1] += [cur_highest] he_range += [(he_start, i)] highest_idx = -1 he_start = -1 else: if heat_value_gaussian[i] > cur_highest: cur_highest = heat_value_gaussian[i] highest_idx = i else: assert he_start == -1 if heat_value_gaussian[i] > heat_value_gaussian2[i]: cur_highest = heat_value_gaussian[i] highest_idx = i he_start = i # Usually the HE point at the end of a live stream is just to say goodbye # if highest_idx != -1: # he_points[0] += [highest_idx] # he_points[1] += [cur_highest] return heat_time, heat_value_gaussian / np.sqrt( heat_value_gaussian2), np.sqrt( heat_value_gaussian2), he_points, he_range
if args.he_map is not None: he_pairs = heat_values[3] all_timestamps = heat_values[0][0] heat_comments = [] xml_list_iter = iter(xml_list) tr4s = TextRank4Sentence() for start, end in tqdm(heat_values[4]): comment_list = [] while True: try: element = next(xml_list_iter) except StopIteration: break if get_time(element) <= start + 45: continue if get_time(element) > end + 45: break if element.tag == 'd': text = element.text if text is not None and not text.replace( " ", "").replace("哈", "") == "": comment_list += [text] print(len(comment_list)) if len(comment_list) > 1000: comment_list = random.sample(comment_list, 1000) tr4s.analyze("\n".join(comment_list), lower=True, source='no_filter') key_sentences = tr4s.get_key_sentences(1)