def BMN_inference(opt): model = BMN(opt) model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda() checkpoint = torch.load(opt["checkpoint_path"] + "/BMN_best.pth.tar") model.load_state_dict(checkpoint['state_dict']) model.eval() test_loader = torch.utils.data.DataLoader(VideoDataSet( opt, subset="validation"), batch_size=1, shuffle=False, num_workers=8, pin_memory=True, drop_last=False) tscale = opt["temporal_scale"] with torch.no_grad(): for idx, input_data in test_loader: video_name = test_loader.dataset.video_list[idx[0]] input_data = input_data.cuda() confidence_map, start, end = model(input_data) # print(start.shape,end.shape,confidence_map.shape) start_scores = start[0].detach().cpu().numpy() end_scores = end[0].detach().cpu().numpy() clr_confidence = (confidence_map[0][1]).detach().cpu().numpy() reg_confidence = (confidence_map[0][0]).detach().cpu().numpy() # 遍历起始分界点与结束分界点的组合 new_props = [] for idx in range(tscale): for jdx in range(tscale): start_index = idx end_index = jdx + 1 if start_index < end_index and end_index < tscale: xmin = start_index / tscale xmax = end_index / tscale xmin_score = start_scores[start_index] xmax_score = end_scores[end_index] clr_score = clr_confidence[idx, jdx] reg_score = reg_confidence[idx, jdx] score = xmin_score * xmax_score * clr_score * reg_score new_props.append([ xmin, xmax, xmin_score, xmax_score, clr_score, reg_score, score ]) new_props = np.stack(new_props) ######################################################################### col_name = [ "xmin", "xmax", "xmin_score", "xmax_score", "clr_score", "reg_socre", "score" ] new_df = pd.DataFrame(new_props, columns=col_name) new_df.to_csv("./output/BMN_results/" + video_name + ".csv", index=False)
def BMN_inference(opt): model = BMN(opt).cuda() checkpoint = torch.load(opt["checkpoint_path"] + "/BMN_best.pth.tar") model.load_state_dict(checkpoint['state_dict']) model.eval() test_loader = torch.utils.data.DataLoader(VideoDataSet( opt, subset="validation"), batch_size=1, shuffle=False, num_workers=8, pin_memory=True, drop_last=False) tscale = opt["temporal_scale"] tgap = 1. / tscale peak_thres = opt["pgm_threshold"] with torch.no_grad(): for idx, input_data in test_loader: video_name = test_loader.dataset.video_list[idx[0]] input_data = input_data.cuda() start_end, confidence_map = model(input_data) start_scores = start_end[0][0].detach().cpu().numpy() end_scores = start_end[0][1].detach().cpu().numpy() clr_confidence = (confidence_map[0][0] * confidence_mask).detach().cpu().numpy() reg_confidence = (confidence_map[0][1] * confidence_mask).detach().cpu().numpy() max_start = max(start_scores) max_end = max(end_scores) #################################################################################################### # generate the set of start points and end points start_bins = np.zeros(len(start_scores)) start_bins[[0, -1]] = 1 # [1,0,0...,0,1] 首末两帧 for idx in range(1, tscale - 1): if start_scores[idx] > start_scores[ idx + 1] and start_scores[idx] > start_scores[idx - 1]: start_bins[idx] = 1 elif start_scores[idx] > (peak_thres * max_start): start_bins[idx] = 1 end_bins = np.zeros(len(end_scores)) end_bins[[0, -1]] = 1 for idx in range(1, tscale - 1): if end_scores[idx] > end_scores[ idx + 1] and end_scores[idx] > end_scores[idx - 1]: end_bins[idx] = 1 elif end_scores[idx] > (peak_thres * max_end): end_bins[idx] = 1 ######################################################################################################## xmin_list = [] xmin_score_list = [] xmax_list = [] xmax_score_list = [] for j in range(tscale): if start_bins[j] == 1: xmin_list.append( tgap / 2 + tgap * j) # [0.01,0.02]与gt的重合度高,那么实际上区间的中点才是分界点 xmin_score_list.append(start_scores[j]) if end_bins[j] == 1: xmax_list.append(tgap / 2 + tgap * j) xmax_score_list.append(end_scores[j]) ######################################################################### # 遍历起始分界点与结束分界点的组合 new_props = [] for ii in range(len(xmax_list)): tmp_xmax = xmax_list[ii] tmp_xmax_score = xmax_score_list[ii] for ij in range(len(xmin_list)): tmp_xmin = xmin_list[ij] tmp_xmin_score = xmin_score_list[ij] if tmp_xmin >= tmp_xmax: break start_point = int((tmp_xmin - tgap / 2) / tgap) end_point = int((tmp_xmax - tgap / 2) / tgap) duration = end_point - start_point clr_score = clr_confidence[duration, start_point] reg_score = reg_confidence[duration, start_point] score = tmp_xmax_score * tmp_xmax_score * np.sqrt( clr_score * reg_score) if score == 0: print(video_name, tmp_xmin, tmp_xmax, tmp_xmin_score, tmp_xmax_score, clr_score, reg_score, score, confidence_map[0, 0, duration, start_point], duration, start_point) new_props.append([ tmp_xmin, tmp_xmax, tmp_xmin_score, tmp_xmax_score, clr_score, reg_score, score ]) new_props = np.stack(new_props) ######################################################################### col_name = [ "xmin", "xmax", "xmin_score", "xmax_score", "clr_score", "reg_socre", "score" ] new_df = pd.DataFrame(new_props, columns=col_name) new_df.to_csv("./output/BMN_results/" + video_name + ".csv", index=False)
def BMN_inference(opt): model = BMN(opt) model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda() checkpoint = torch.load(opt["checkpoint_path"] + "/BMN_best.pth.tar") model.load_state_dict(checkpoint['state_dict']) model.eval() test_loader = torch.utils.data.DataLoader(VideoDataSet(opt, subset="validation"), batch_size=1, shuffle=False, num_workers=8, pin_memory=True, drop_last=False) tscale = opt["temporal_scale"] # 100 with torch.no_grad(): for idx, input_data in test_loader: video_name = test_loader.dataset.video_list[idx[0]] input_data = input_data.cuda() confidence_map, start, end = model(input_data) #(1,2,100,100),(1,100),(1,100) #print(start.shape,end.shape,confidence_map.shape) start_scores = start[0].detach().cpu().numpy() # (100,) end_scores = end[0].detach().cpu().numpy() # (100,) clr_confidence = (confidence_map[0][1]).detach().cpu().numpy() # (100,100) reg_confidence = (confidence_map[0][0]).detach().cpu().numpy() # (100,100) # 获取得分的峰值 max_start = max(start_scores) max_end = max(end_scores) #################################################################################################### # generate the set of start points and end points start_bins = np.zeros(len(start_scores)) # [0,0,0,....,0] 100个时序点 start_bins[0] = 1 # 将第一个时序点置为1 for idx in range(1, tscale - 1): if start_scores[idx] > start_scores[idx + 1] and start_scores[idx] > start_scores[idx - 1]: start_bins[idx] = 1 elif start_scores[idx] > (0.5 * max_start): start_bins[idx] = 1 end_bins = np.zeros(len(end_scores)) end_bins[-1] = 1 # 将最后一个时序点置为1 for idx in range(1, tscale - 1): if end_scores[idx] > end_scores[idx + 1] and end_scores[idx] > end_scores[idx - 1]: end_bins[idx] = 1 elif end_scores[idx] > (0.5 * max_end): end_bins[idx] = 1 ######################################################################################################## ######################################################################### # 遍历起始分界点与结束分界点的组合 new_props = [] # 相当于遍历每种提议时长的每个时间点 for idx in range(tscale): # 用于索引duration,对于某一个idx,其对应提议的时长都相同 for jdx in range(tscale): # 用于遍历100时间点 start_index = jdx end_index = start_index + idx+1 if end_index < tscale and start_bins[start_index] == 1 and end_bins[end_index] == 1: xmin = start_index/tscale xmax = end_index/tscale xmin_score = start_scores[start_index] xmax_score = end_scores[end_index] clr_score = clr_confidence[idx, jdx] reg_score = reg_confidence[idx, jdx] score = xmin_score * xmax_score * clr_score*reg_score new_props.append([xmin, xmax, xmin_score, xmax_score, clr_score, reg_score, score]) new_props = np.stack(new_props) ######################################################################### col_name = ["xmin", "xmax", "xmin_score", "xmax_score", "clr_score", "reg_socre", "score"] new_df = pd.DataFrame(new_props, columns=col_name) new_df.to_csv("./output/BMN_results/" + video_name + ".csv", index=False)