def _gen_detection_video(video_list, cls_data, thu_label_id, video_dict, cfg, num_prop=200, topk=2): dscale = cfg.DATASET.dscale cols = ["xmin", "xmax", "score"] for video_name, video_cls in zip(video_list, cls_data): # read all files video_folder = os.path.join(cfg.output_path, video_name) try: files = [ os.path.join(video_folder, f) for f in os.listdir(video_folder) ] except: continue if len(files) == 0: print("Missing result for video {}".format(video_name)) files.sort() # prepare whole video data num_frames = video_dict[video_name]["frame"] seconds = video_dict[video_name]["duration"] snippet_stride = cfg.FEATURE.snippet_stride snippet_num = num_frames // snippet_stride + 1 v_snippets = [snippet_stride * i for i in range(snippet_num)] snippet_num = len(v_snippets) v_iou_map = np.zeros((cfg.DATASET.dscale, snippet_num)) v_tem_s = np.zeros(snippet_num) v_tem_e = np.zeros(snippet_num) # proposal might be duplicated due to sliding window, need to mean by count v_iou_map_cnt = np.zeros((cfg.DATASET.dscale, snippet_num)) v_tem_cnt = np.zeros(snippet_num) for snippet_file in files: # load output result with open(snippet_file, "rb") as infile: result_data = pickle.load(infile) [ tmp_snippet, pred_local_s, pred_local_e, pred_global_s, pred_global_e, pred_iou_map ] = result_data pred_start = np.sqrt(pred_local_s * pred_global_s) pred_end = np.sqrt(pred_local_e * pred_global_e) # get true index of current window to aviod invalid time stamp start_idx = int(min(np.argwhere(tmp_snippet >= 0))) end_idx = int(max(np.argwhere(tmp_snippet <= num_frames))) true_start = tmp_snippet[start_idx] true_end = tmp_snippet[end_idx] # get absolute index of whole video v_s_idx = v_snippets.index(true_start) v_e_idx = v_snippets.index(true_end) # push data to whole data v_tem_s[v_s_idx:v_e_idx + 1] += pred_start[start_idx:end_idx + 1] v_tem_e[v_s_idx:v_e_idx + 1] += pred_end[start_idx:end_idx + 1] iou_mask = get_valid_mask(dscale, end_idx - start_idx + 1) pred_iou_map = pred_iou_map[:, :, :end_idx - start_idx + 1] pred_iou_map = pred_iou_map[0, :, :] * pred_iou_map[1, :, :] v_iou_map[:, v_s_idx:v_e_idx + 1] += pred_iou_map * iou_mask # update count v_tem_cnt[v_s_idx:v_e_idx + 1] += 1 v_iou_map_cnt[:, v_s_idx:v_e_idx + 1] += iou_mask v_tem_s /= v_tem_cnt + 1e-6 v_tem_e /= v_tem_cnt + 1e-6 v_iou_map /= v_iou_map_cnt + 1e-6 start_mask = boundary_choose(v_tem_s) start_mask[0] = 1.0 end_mask = boundary_choose(v_tem_e) end_mask[-1] = 1.0 score_vector_list = [] for idx in range(1, dscale): for jdx in range(snippet_num): start_idx = jdx end_idx = start_idx + idx if end_idx < snippet_num and start_mask[ start_idx] == 1 and end_mask[end_idx] == 1: xmin = v_snippets[start_idx] xmax = v_snippets[end_idx] xmin_score = v_tem_s[start_idx] xmax_score = v_tem_e[end_idx] bm_score = v_iou_map[idx, jdx] conf_score = xmin_score * xmax_score * bm_score score_vector_list.append([xmin, xmax, conf_score]) score_vector_list = np.stack(score_vector_list) df = pd.DataFrame(score_vector_list, columns=cols) if len(df) > 1: df = Soft_NMS( df, iou_threshold=cfg.DETECTION_POST.iou_threshold, sigma=cfg.DETECTION_POST.sigma, ) df = df.sort_values(by="score", ascending=False) # sort video classification video_cls_rank = sorted((e, i) for i, e in enumerate(video_cls)) unet_classes = [ thu_label_id[video_cls_rank[-k - 1][1]] + 1 for k in range(topk) ] unet_scores = [video_cls_rank[-k - 1][0] for k in range(topk)] proposal_list = [] for j in range(min(num_prop, len(df))): for k in range(topk): tmp_proposal = {} tmp_proposal["label"] = thumos_class[int(unet_classes[k])] tmp_proposal["score"] = float( round(df.score.values[j] * unet_scores[k], 6)) tmp_xmin = max(0, df.xmin.values[j] / num_frames) * seconds tmp_xmax = min(1, df.xmax.values[j] / num_frames) * seconds tmp_proposal["segment"] = [ float(round(tmp_xmin, 1)), float(round(tmp_xmax, 1)), ] proposal_list.append(tmp_proposal) result_dict[video_name] = proposal_list
def _get_valid_mask(self): self.valid_mask = get_valid_mask(self.dscale, self.tscale)
def __init__(self, cfg): super(TAL_loss, self).__init__() self.cfg = cfg self.mask = torch.Tensor( get_valid_mask(cfg.DATASET.dscale, cfg.DATASET.tscale))