def BMN_inference(opt):
    model = BMN(opt)
    model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda()
    checkpoint = torch.load(opt["checkpoint_path"] + "/BMN_best.pth.tar")
    model.load_state_dict(checkpoint['state_dict'])
    model.eval()

    test_loader = torch.utils.data.DataLoader(VideoDataSet(
        opt, subset="validation"),
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=8,
                                              pin_memory=True,
                                              drop_last=False)
    tscale = opt["temporal_scale"]
    with torch.no_grad():
        for idx, input_data in test_loader:
            video_name = test_loader.dataset.video_list[idx[0]]
            input_data = input_data.cuda()
            confidence_map, start, end = model(input_data)

            # print(start.shape,end.shape,confidence_map.shape)
            start_scores = start[0].detach().cpu().numpy()
            end_scores = end[0].detach().cpu().numpy()
            clr_confidence = (confidence_map[0][1]).detach().cpu().numpy()
            reg_confidence = (confidence_map[0][0]).detach().cpu().numpy()

            # 遍历起始分界点与结束分界点的组合
            new_props = []
            for idx in range(tscale):
                for jdx in range(tscale):
                    start_index = idx
                    end_index = jdx + 1
                    if start_index < end_index and end_index < tscale:
                        xmin = start_index / tscale
                        xmax = end_index / tscale
                        xmin_score = start_scores[start_index]
                        xmax_score = end_scores[end_index]
                        clr_score = clr_confidence[idx, jdx]
                        reg_score = reg_confidence[idx, jdx]
                        score = xmin_score * xmax_score * clr_score * reg_score
                        new_props.append([
                            xmin, xmax, xmin_score, xmax_score, clr_score,
                            reg_score, score
                        ])
            new_props = np.stack(new_props)
            #########################################################################

            col_name = [
                "xmin", "xmax", "xmin_score", "xmax_score", "clr_score",
                "reg_socre", "score"
            ]
            new_df = pd.DataFrame(new_props, columns=col_name)
            new_df.to_csv("./output/BMN_results/" + video_name + ".csv",
                          index=False)
def BMN_inference(opt):
    model = BMN(opt).cuda()
    checkpoint = torch.load(opt["checkpoint_path"] + "/BMN_best.pth.tar")
    model.load_state_dict(checkpoint['state_dict'])
    model.eval()

    test_loader = torch.utils.data.DataLoader(VideoDataSet(
        opt, subset="validation"),
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=8,
                                              pin_memory=True,
                                              drop_last=False)
    tscale = opt["temporal_scale"]
    tgap = 1. / tscale
    peak_thres = opt["pgm_threshold"]
    with torch.no_grad():
        for idx, input_data in test_loader:
            video_name = test_loader.dataset.video_list[idx[0]]
            input_data = input_data.cuda()
            start_end, confidence_map = model(input_data)

            start_scores = start_end[0][0].detach().cpu().numpy()
            end_scores = start_end[0][1].detach().cpu().numpy()
            clr_confidence = (confidence_map[0][0] *
                              confidence_mask).detach().cpu().numpy()
            reg_confidence = (confidence_map[0][1] *
                              confidence_mask).detach().cpu().numpy()

            max_start = max(start_scores)
            max_end = max(end_scores)

            ####################################################################################################
            # generate the set of start points and end points
            start_bins = np.zeros(len(start_scores))
            start_bins[[0, -1]] = 1  # [1,0,0...,0,1] 首末两帧
            for idx in range(1, tscale - 1):
                if start_scores[idx] > start_scores[
                        idx + 1] and start_scores[idx] > start_scores[idx - 1]:
                    start_bins[idx] = 1
                elif start_scores[idx] > (peak_thres * max_start):
                    start_bins[idx] = 1

            end_bins = np.zeros(len(end_scores))
            end_bins[[0, -1]] = 1
            for idx in range(1, tscale - 1):
                if end_scores[idx] > end_scores[
                        idx + 1] and end_scores[idx] > end_scores[idx - 1]:
                    end_bins[idx] = 1
                elif end_scores[idx] > (peak_thres * max_end):
                    end_bins[idx] = 1
            ########################################################################################################

            xmin_list = []
            xmin_score_list = []
            xmax_list = []
            xmax_score_list = []
            for j in range(tscale):
                if start_bins[j] == 1:
                    xmin_list.append(
                        tgap / 2 +
                        tgap * j)  # [0.01,0.02]与gt的重合度高,那么实际上区间的中点才是分界点
                    xmin_score_list.append(start_scores[j])
                if end_bins[j] == 1:
                    xmax_list.append(tgap / 2 + tgap * j)
                    xmax_score_list.append(end_scores[j])

            #########################################################################
            # 遍历起始分界点与结束分界点的组合
            new_props = []
            for ii in range(len(xmax_list)):
                tmp_xmax = xmax_list[ii]
                tmp_xmax_score = xmax_score_list[ii]
                for ij in range(len(xmin_list)):
                    tmp_xmin = xmin_list[ij]
                    tmp_xmin_score = xmin_score_list[ij]
                    if tmp_xmin >= tmp_xmax:
                        break
                    start_point = int((tmp_xmin - tgap / 2) / tgap)
                    end_point = int((tmp_xmax - tgap / 2) / tgap)
                    duration = end_point - start_point
                    clr_score = clr_confidence[duration, start_point]
                    reg_score = reg_confidence[duration, start_point]
                    score = tmp_xmax_score * tmp_xmax_score * np.sqrt(
                        clr_score * reg_score)
                    if score == 0:
                        print(video_name, tmp_xmin, tmp_xmax, tmp_xmin_score,
                              tmp_xmax_score, clr_score, reg_score, score,
                              confidence_map[0, 0, duration, start_point],
                              duration, start_point)
                    new_props.append([
                        tmp_xmin, tmp_xmax, tmp_xmin_score, tmp_xmax_score,
                        clr_score, reg_score, score
                    ])
            new_props = np.stack(new_props)
            #########################################################################

            col_name = [
                "xmin", "xmax", "xmin_score", "xmax_score", "clr_score",
                "reg_socre", "score"
            ]
            new_df = pd.DataFrame(new_props, columns=col_name)
            new_df.to_csv("./output/BMN_results/" + video_name + ".csv",
                          index=False)
Esempio n. 3
0
File: main.py Progetto: cxqj/44-BMN
def BMN_inference(opt):
    model = BMN(opt)
    model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda()
    checkpoint = torch.load(opt["checkpoint_path"] + "/BMN_best.pth.tar")
    model.load_state_dict(checkpoint['state_dict'])
    model.eval()

    test_loader = torch.utils.data.DataLoader(VideoDataSet(opt, subset="validation"),
                                              batch_size=1, shuffle=False,
                                              num_workers=8, pin_memory=True, drop_last=False)
    tscale = opt["temporal_scale"]  # 100
    with torch.no_grad():
        for idx, input_data in test_loader:
            video_name = test_loader.dataset.video_list[idx[0]]
            input_data = input_data.cuda()
            confidence_map, start, end = model(input_data)  #(1,2,100,100),(1,100),(1,100)

            #print(start.shape,end.shape,confidence_map.shape)
            start_scores = start[0].detach().cpu().numpy()  # (100,)
            end_scores = end[0].detach().cpu().numpy()  # (100,)
            clr_confidence = (confidence_map[0][1]).detach().cpu().numpy()  # (100,100)
            reg_confidence = (confidence_map[0][0]).detach().cpu().numpy()  # (100,100)

            # 获取得分的峰值
            max_start = max(start_scores)
            max_end = max(end_scores)

            ####################################################################################################
            # generate the set of start points and end points
            start_bins = np.zeros(len(start_scores))  # [0,0,0,....,0] 100个时序点
            start_bins[0] = 1   # 将第一个时序点置为1
            for idx in range(1, tscale - 1):
                if start_scores[idx] > start_scores[idx + 1] and start_scores[idx] > start_scores[idx - 1]:
                    start_bins[idx] = 1
                elif start_scores[idx] > (0.5 * max_start):
                    start_bins[idx] = 1

            end_bins = np.zeros(len(end_scores))  
            end_bins[-1] = 1   # 将最后一个时序点置为1
            for idx in range(1, tscale - 1):
                if end_scores[idx] > end_scores[idx + 1] and end_scores[idx] > end_scores[idx - 1]:
                    end_bins[idx] = 1
                elif end_scores[idx] > (0.5 * max_end):
                    end_bins[idx] = 1
            ########################################################################################################

            #########################################################################
            # 遍历起始分界点与结束分界点的组合
            new_props = []
            # 相当于遍历每种提议时长的每个时间点
            for idx in range(tscale): # 用于索引duration,对于某一个idx,其对应提议的时长都相同
                for jdx in range(tscale):  # 用于遍历100时间点
                    start_index = jdx
                    end_index = start_index + idx+1
                    if end_index < tscale and start_bins[start_index] == 1 and end_bins[end_index] == 1:
                        xmin = start_index/tscale
                        xmax = end_index/tscale
                        xmin_score = start_scores[start_index]
                        xmax_score = end_scores[end_index]
                        clr_score = clr_confidence[idx, jdx]
                        reg_score = reg_confidence[idx, jdx]
                        score = xmin_score * xmax_score * clr_score*reg_score
                        new_props.append([xmin, xmax, xmin_score, xmax_score, clr_score, reg_score, score])
            new_props = np.stack(new_props)
            #########################################################################

            col_name = ["xmin", "xmax", "xmin_score", "xmax_score", "clr_score", "reg_socre", "score"]
            new_df = pd.DataFrame(new_props, columns=col_name)
            new_df.to_csv("./output/BMN_results/" + video_name + ".csv", index=False)