with torch.no_grad(): for video_name in bridge.video_index.keys(): video_index = bridge.video_index[video_name] for video_name in bridge.video_index.keys(): frame_num_list = bridge.video_index[video_name] ids = [ bridge.video_frame_2_id(video_name, frame_num) for frame_num in frame_num_list ] video_dets = torch.Tensor() # 将每一个视频片段转换为tensor for frame_num in frame_num_list: frame_dets = bridge.get_frame_det(video_name, frame_num) for det in frame_dets: det_t = torch.tensor([ det["image_id"], det["bbox"][0], det["bbox"][1], det["bbox"][2], det["bbox"][3], det["score"] ]).unsqueeze(dim=0) if video_dets.shape[0] == 0: video_dets = det_t else: video_dets = torch.cat([video_dets, det_t]) # 选取这段视频中的score最大的框 used_mask = torch.zeros(video_dets.shape[0]) max_score, max_score_index = select_max_det(video_dets, used_mask) used_mask[ max_score_index] = True # 标记已经被选择的框,排除后迭代,直到max score小于阈值0.2