def evaluate(args, model, dataset, proportion=0.15):

    with torch.no_grad():
        model.eval()
        meanFscores = []
        meanPrecision = []
        meanRecall = []

        if args.verbose: table = [["No.", "Video", "F-score"]]
        sum_proportions = [0.15, 0.25, 0.35, 0.45]
        for key_idx, key in enumerate(args.test_keys):

            # load gt_score txt file
            gtscore = np.loadtxt(
                osp.join(args.gtpath, 'gtscore_' + key + '_5fps.txt'))

            video_info = dataset[key][u'video_1']
            positions = video_info['picks'][...]

            seq = video_info['features'][...]
            seq = torch.from_numpy(seq).unsqueeze(0)

            seq = seq.cuda()
            sig_probs = model(seq)

            probs_importance = sig_probs.data.cpu().squeeze().numpy()

            # generate gt summary on-fly according to different proportion constrains
            fscore_prop = []
            precision_prop = []
            recall_prop = []
            for p in range(len(sum_proportions)):
                prop = sum_proportions[p]
                # generate gt summary on-fly according to different propoption constrains
                gt_summary, _, _ = vsum_tools.generate_summary(gtscore,
                                                               video_info,
                                                               positions,
                                                               proportion=prop)
                machine_summary, pred_probs_up, pick_segs \
                            = vsum_tools.generate_summary(probs_importance, video_info, positions, proportion=prop)
                f_score, precision, recall = eval_tools.evaluate_summary(
                    machine_summary, gt_summary)
                fscore_prop.append(f_score)
                precision_prop.append(precision)
                recall_prop.append(recall)

            meanFscores.append(fscore_prop)
            meanPrecision.append(precision_prop)
            meanRecall.append(recall_prop)

            if args.verbose:
                table.append([key_idx + 1, key, "{:.1%}".format(f_score)])

    if args.verbose:
        print(tabulate(table))

    mean_fscores = np.mean(meanFscores, axis=0)
    mean_precision = np.mean(meanPrecision, axis=0)
    mean_recall = np.mean(meanRecall, axis=0)
    return mean_fscores, mean_precision, mean_recall
def evaluate(model, dataset, test_keys, use_gpu):
	print("==> Test")
	with torch.no_grad():
		model.eval()
		fms = []
		taus = []
		human_taus = []
		eval_metric = 'avg' if args.metric == 'tvsum' else 'max'

		if args.verbose: table = [["No.", "Video", "F-score", "Kendall's Tau", "Avg human score"]]

		if args.save_results:
			h5_res = h5py.File(osp.join(args.save_dir, 'result.h5'), 'w')

		for key_idx, key in enumerate(test_keys):
			seq = dataset[key]['features'][...]
			seq = torch.from_numpy(seq).unsqueeze(0)
			if use_gpu: seq = seq.cuda()
			probs = model(seq)
			probs = probs.data.cpu().squeeze().numpy()

			cps = dataset[key]['change_points'][...]
			num_frames = dataset[key]['n_frames'][()]
			nfps = dataset[key]['n_frame_per_seg'][...].tolist()
			positions = dataset[key]['picks'][...]
			user_summary = dataset[key]['user_summary'][...]

			machine_summary = vsum_tools.generate_summary(probs, cps, num_frames, nfps, positions)
			fm, _, _ = vsum_tools.evaluate_summary(machine_summary, user_summary, eval_metric)
			fms.append(fm)
			kendaltau, human_avg_score = vsum_tools.kendaltau(machine_summary, user_summary)
			taus.append(kendaltau)
			human_taus.append(human_avg_score)
			if args.verbose:
				table.append([key_idx+1, key, "{:.4f}".format(fm), "{:.4f}".format(kendaltau), "{:.4f}".format(human_avg_score)])

			if args.save_results:
				h5_res.create_dataset(key + '/score', data=probs)
				h5_res.create_dataset(key + '/machine_summary', data=machine_summary)
				h5_res.create_dataset(key + '/gtscore', data=dataset[key]['gtscore'][...])
				h5_res.create_dataset(key + '/fm', data=fm)
				h5_res.create_dataset(key + '/tau', data=kendaltau)

	if args.verbose:
		print(tabulate(table))

	if args.save_results: h5_res.close()

	mean_fm = np.mean(fms)
	print("Average F-score {:.4%}".format(mean_fm))

	mean_tau = np.mean(taus)
	print("Average Kendall's tau {:.4f}".format(mean_tau))
	human_tau = np.mean(human_taus)
	print("Average Human tau {:.4f}".format(human_tau))
	return mean_fm, mean_tau
Exemple #3
0
    def evaluate(self, epoch_i, step, writer):
        self.summarizer.eval()

        # ======================== testing set test ================================ #
        # ========================================================================== #
        out_dict = {}
        acc_list = []
        loss_list = []
        # for [video_tensor, video_gt, video_name] in self.test_loader:
        for [
                video_tensor, gtsummary, gtscore, cps, num_frames, nfps,
                positions, user_summary, name
        ] in self.test_loader:
            # video_name = video_name[0]
            video_name = name[0]
            video_gt = gtsummary
            # video_pos = Variable(pos).cuda()
            video_feature = Variable(video_tensor).cuda()
            scores, = self.summarizer(video_feature)
            # scores = self.summarizer(video_pos, video_feature)
            classify_loss = self.classify_loss(
                scores,
                Variable(video_gt).view(-1, 1, 1).cuda())
            # classify_loss = self.weighted_binary_cross_entropy(scores, Variable(video_gt.view(-1,1,1)).cuda())
            scores = scores.cpu().detach().numpy().squeeze()
            cps = cps.numpy().squeeze(0)
            num_frames = num_frames.numpy().squeeze(0)
            nfps = nfps.numpy().squeeze(0).tolist()
            positions = positions.numpy().squeeze(0)
            user_summary = user_summary.numpy().squeeze(0)
            video_name = name[0]
            # print(user_summary.shape[0])

            machine_summary = vsum_tools.generate_summary(
                scores, cps, num_frames, nfps, positions)
            fm, _, _ = vsum_tools.evaluate_summary(machine_summary,
                                                   user_summary, 'avg')

            # out_dict[video_name] = scores.squeeze(1).tolist()
            # P, R, f_score = self.f_score(scores.squeeze(), video_gt.squeeze(), True)
            loss_list.append(classify_loss.item())
            # acc_list.append(f_score)
            acc_list.append(fm)
            # log(f'video_name: {video_name:<9} P: {P:.3f} R:{R:.3f} f_score:{f_score:.3f}')
            log(f'video_name: {video_name:<9} f_score:{fm:.3f}')

        seclog([
            f'testing loss : {np.mean(loss_list):.3f} mean of f_score : {np.mean(acc_list):.3f}',
            'light_red'
        ])
        # seclog([f'testing f_score: {np.mean(acc_list):.3f}', 'blue'])
        # writer.add_scalar('test_loss',np.mean(loss_list),step)
        writer.add_scalar('test_f_score', np.mean(acc_list), step)
        writer.add_scalar('test_loss', np.mean(loss_list), step)

        return np.mean(acc_list)
Exemple #4
0
def evaluate(model, dataset, test_keys, use_gpu):
    print("==> Test")
    with torch.no_grad():
        model.eval()
        fms = []

        table = [["No.", "Video", "F-score"]]
        if not os.path.isdir(args.save_dir):
            os.mkdir(args.save_dir)

        h5_res = h5py.File(os.path.join(args.save_dir, 'result.h5'), 'w')

        for key_idx, key in enumerate(test_keys):
            seq = dataset[key]['features'][...]
            seq = torch.from_numpy(seq).unsqueeze(0)
            if use_gpu: seq = seq.cuda()
            probs = model(seq)
            probs = probs.data.cpu().squeeze().numpy()

            cps = dataset[key]['change_points'][...]
            num_frames = dataset[key]['n_frames'][()]
            nfps = dataset[key]['n_frame_per_seg'][...].tolist()
            positions = dataset[key]['picks'][...]
            video_name = dataset[key]['video_name'][()]
            video_dir = dataset[key]['video_dir'][()]
            fps = dataset[key]['fps'][()]
            #print(cps)
            #print(nfps)
            sum = 0
            for i in range(len(nfps)):
                sum += nfps[i]
            #print(sum)
            #print(positions)
            #video_name = 'train'
            machine_summary = vsum_tools.generate_summary(
                probs, cps, num_frames, nfps, positions)
            #print(video_name)
            #print(":")
            print(machine_summary.shape)
            h5_res.create_dataset(key + '/score', data=probs)
            h5_res.create_dataset(key + '/machine_summary',
                                  data=machine_summary)
            h5_res.create_dataset(key + '/video_name', data=video_name)
            h5_res.create_dataset(key + '/fps', data=fps)
            h5_res.create_dataset(key + '/video_dir', data=video_dir)

    h5_res.close()
Exemple #5
0
def evaluate(model, dataset, test_keys, use_gpu):
    print("==> Test")
    with torch.no_grad():
        model.eval()
        fms = []
        eval_metric = 'avg' if args.metric == 'tvsum' else 'max'

        table = [["No.", "Video", "F-score"]]

        h5_res = h5py.File(os.path.join(args.save_dir, 'result.h5'), 'w')

        for key_idx, key in enumerate(test_keys):
            seq = dataset[key]['features'][...]
            seq = torch.from_numpy(seq).unsqueeze(0)
            if use_gpu: seq = seq.cuda()
            probs = model(seq)
            probs = probs.data.cpu().squeeze().numpy()

            cps = dataset[key]['change_points'][...]
            num_frames = dataset[key]['n_frames'][()]
            nfps = dataset[key]['n_frame_per_seg'][...].tolist()
            positions = dataset[key]['picks'][...]
            #user_summary = dataset[key]['user_summary'][...]

            machine_summary = vsum_tools.generate_summary(
                probs, cps, num_frames, nfps, positions)
            #fm, _, _ = vsum_tools.evaluate_summary(machine_summary, user_summary, eval_metric)
            #fms.append(fm)

            #table.append([key_idx+1, key, "{:.1%}".format(fm)])

            h5_res.create_dataset(key + '/score', data=probs)
            h5_res.create_dataset(key + '/machine_summary',
                                  data=machine_summary)
            #h5_res.create_dataset(key + '/gtscore', data=dataset[key]['gtscore'][...])
            #h5_res.create_dataset(key + '/fm', data=fm)

    print(tabulate(table))

    h5_res.close()

    mean_fm = np.mean(fms)
    print("Average F-score {:.1%}".format(mean_fm))

    return mean_fm
def test(
    n_episodes=5,
    input_dim=1024,
    hidden_dim=256,
    W_init='normal',
    U_init='normal',
    weight_decay=1e-5,
    regularizer='L2',
    optimizer='adam',
    alpha=0.01,
    model_file='',
    eval_dataset='summe',
    verbose=True,
):
    assert eval_dataset in ['summe', 'tvsum']
    assert os.path.isfile(model_file)

    if eval_dataset == 'summe':
        eval_metric = 'max'
    elif eval_dataset == 'tvsum':
        eval_metric = 'avg'
    model_options = locals().copy()

    log_dir = 'log-test'
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    logging.basicConfig(filename=log_dir + '/log.txt',
                        filemode='w',
                        format='%(asctime)s %(message)s',
                        datefmt='[%d/%m/%Y %I:%M:%S]',
                        level=logging.INFO)

    logger = logging.getLogger()
    ch = logging.StreamHandler()
    ch.setLevel(logging.INFO)
    formatter = logging.Formatter(fmt='%(asctime)s %(message)s',
                                  datefmt='[%d/%m/%Y %I:%M:%S]')
    ch.setFormatter(formatter)
    logger.addHandler(ch)

    logger.info('initializing net model')
    net = reinforceRNN(model_options)

    logger.info('loading %s data' % (eval_dataset))
    h5f_path = 'datasets/eccv16_dataset_' + eval_dataset + '_google_pool5.h5'
    dataset = h5py.File(h5f_path, 'r')
    dataset_keys = dataset.keys()
    n_videos = len(dataset_keys)

    logger.info('=> testing')
    start_time = time.time()
    fms = []
    precs = []
    recs = []

    for i_video in range(n_videos):
        key = dataset_keys[i_video]
        data_x = dataset[key]['features'][...].astype(_DTYPE)
        probs = net.model_inference(data_x)

        cps = dataset[key]['change_points'][...]
        n_frames = dataset[key]['n_frames'][()]
        nfps = dataset[key]['n_frame_per_seg'][...].tolist()
        positions = dataset[key]['picks'][...]

        machine_summary = vsum_tools.generate_summary(probs, cps, n_frames,
                                                      nfps, positions)
        user_summary = dataset[key]['user_summary'][...]
        fm, prec, rec = vsum_tools.evaluate_summary(machine_summary,
                                                    user_summary, eval_metric)
        fms.append(fm)
        precs.append(prec)
        recs.append(rec)
        if verbose: logger.info('video %s. fm=%f' % (key, fm))

    mean_fm = np.mean(fms)
    mean_prec = np.mean(precs)
    mean_rec = np.mean(recs)

    logger.info(
        '========================= conclusion =========================')
    logger.info('-- recap of model options')
    logger.info(str(model_options))
    logger.info('-- final outcome')
    logger.info('f-measure {:.1%}. precision {:.1%}. recall {:.1%}.'.format(
        mean_fm, mean_prec, mean_rec))
    elapsed_time = time.time() - start_time
    logger.info('elapsed time %.2f s' % (elapsed_time))
    logger.info(
        '==============================================================')

    dataset.close()
Exemple #7
0
    def visualize(self):
        # model_path = 'meeting2/tvsum/tvsum_standard_3layer18head/model/score-0.60574.pkl'
        model_path = 'log/tvsum_11.5_atten_only_posffn/model/epoch12_score-0.18391.pkl'
        self.summarizer.load_state_dict(torch.load(model_path))
        self.summarizer.eval()

        # ======================== testing set test ================================ #
        # ========================================================================== #
        out_dict = {}
        acc_list = []
        loss_list = []
        for [
                video_tensor, gtsummary, gtscore, cps, num_frames, nfps,
                positions, user_summary, name
        ] in self.test_loader:
            video_name = name[0]
            video_feature = Variable(video_tensor).cuda()
            scores, att_map = self.summarizer(video_feature, return_attns=True)
            scores = scores.cpu().detach().numpy().squeeze()
            gtsummary = gtsummary.numpy().squeeze(0)
            gtscore = gtscore.numpy().squeeze(0)
            cps = cps.numpy().squeeze(0)
            num_frames = num_frames.numpy().squeeze(0)
            nfps = nfps.numpy().squeeze(0).tolist()
            positions = positions.numpy().squeeze(0)
            user_summary = user_summary.numpy().squeeze(0)
            save_path = f'log/tvsum_2layer8head_11.1/feature_map/'
            if not os.path.exists(save_path):
                os.mkdir(save_path)
            save_path = save_path + f'{video_name}/'

            machine_summary = vsum_tools.generate_summary(
                scores, cps, num_frames, nfps, positions)
            fm, P, R = vsum_tools.evaluate_summary(machine_summary,
                                                   user_summary, 'avg')

            user_score = np.zeros(len(user_summary[0]))
            for user in user_summary:
                user_score += user

            # [seq, head, layer, seq]

            # =========================== Encoder attentive Decoder ================================== #
            # [seq, head, layer, seq]
            attention_map = np.zeros((len(att_map), att_map[0][0][0].shape[0],
                                      len(att_map[0][0]), len(att_map)))
            for i in range(len(att_map)):
                for j in range(len(att_map[0][0])):
                    attention_map[i, :, j, :] = att_map[i][0][j].cpu().detach(
                    ).numpy().squeeze()
            for layer in range(attention_map.shape[2]):
                for h in range(attention_map.shape[1]):

                    df_cm = pd.DataFrame(
                        attention_map[60:, h, layer, :],
                        index=[i for i in range(attention_map.shape[0] - 60)],
                        columns=[i for i in range(attention_map.shape[0])])
                    # plt.figure(figsize = (10,7))
                    # sn.heatmap(df_cm, annot=True)
                    f, ax = plt.subplots(figsize=(14 * 2, 14 * 2))

                    sn.heatmap(df_cm, cmap='YlGnBu', linewidths=0.05, ax=ax)
                    # sn.heatmap(df_cm, annot=True, ax = ax)
                    # 設定Axes的標題
                    ax.set_title(f'Accuracy = {fm*100:.2f}')

                    if not os.path.exists(save_path):
                        os.mkdir(save_path)
                    f.savefig(save_path + f'layer{layer}head_{h}.jpg',
                              dpi=100,
                              bbox_inches='tight')
                    plt.close()

            # ======================================================================================== #

            # =========================== original =================================================== #
            # att_map = att_map[0]
            # for i in range(3):
            #     att_map0 = att_map[i].cpu().detach().numpy()
            #     for h in range(len(att_map0)):

            #         df_cm = pd.DataFrame(att_map0[h], index = [i for i in range(att_map0[h].shape[0])],
            #               columns = [i for i in range(att_map0[h].shape[1])])
            #         # plt.figure(figsize = (10,7))
            #         # sn.heatmap(df_cm, annot=True)
            #         f, ax= plt.subplots(figsize = (14*2, 14*2))

            #         sn.heatmap(df_cm,cmap='YlGnBu', linewidths = 0.05, ax = ax)
            #         # sn.heatmap(df_cm, annot=True, ax = ax)
            #         # 設定Axes的標題
            #         ax.set_title(f'Accuracy = {fm*100:.2f}')

            #         if not os.path.exists(save_path):
            #             os.mkdir(save_path)
            #         f.savefig(save_path+f'layer{i}head_{h}.jpg', dpi=100, bbox_inches='tight')
            #         plt.close()
            # ======================================================================================= #

            # plot score vs gtscore
            fig, axs = plt.subplots(3)
            n = len(gtscore)

            limits = int(math.floor(len(scores) * 0.15))
            order = np.argsort(scores)[::-1].tolist()
            picks = []
            total_len = 0
            for i in order:
                if total_len < limits:
                    picks.append(i)
                    total_len += 1
            y_scores = np.zeros(len(scores))
            y_scores[picks] = gtscore[picks]

            y_summary = np.zeros(len(scores))
            y_summary[picks] = gtsummary[picks]

            # machine_summary = user_score*machine_summary
            # set_trace()
            axs[0].bar(range(n), gtsummary, width=1, color='lightgray')
            axs[0].bar(range(n), y_summary, width=1, color='orange')
            axs[0].set_title("tvsum {} F-score {:.1%}".format(video_name, fm))

            axs[1].bar(range(n), gtscore, width=1, color='lightgray')
            axs[1].bar(range(n), y_scores, width=1, color='orange')
            plt.xticks(np.linspace(0, n, n / 20, endpoint=False, dtype=int))

            axs[2].bar(range(n), scores.tolist(), width=1, color='orange')
            plt.xticks(np.linspace(0, n, n / 20, endpoint=False, dtype=int))
            # axs[2].bar(range(len(user_score)), user_score, width=1, color='lightgray')
            # axs[2].bar(range(len(user_score)), user_score*machine_summary, width=1, color='orange')

            # for i in range(15):
            #     axs[i+3].bar(range(len(user_score)), user_summary[i], width=1, color='lightgray')
            #     axs[i+3].bar(range(len(user_score)), user_summary[i]*machine_summary, width=1, color='orange')
            # print(i)

            # fig = plt.figure(figsize=(10,60))
            fig.tight_layout()
            fig.savefig(save_path + f'visualization3.png', bbox_inches='tight')
            plt.close()

            acc_list.append(fm)
            log(f'video_name: {video_name:<9} P: {P:.3f} R:{R:.3f} f_score:{fm:.3f}'
                )
            break

        seclog([f'testing f_score: {np.mean(acc_list):.3f}', 'blue'])
Exemple #8
0
def evaluate(model, dataset, userscoreset, test_keys, use_gpu):
    print("==> Test")
    with torch.no_grad():
        model.eval()
        fms = []
        eval_metric = 'avg' if args.metric == 'tvsum' else 'max'
        if args.verbose: table = [["No.", "Video", "F-score"]]

        if args.save_results:
            h5_res = h5py.File(
                osp.join(
                    args.save_dir, 'result_ep{}_split_{}_{}.h5'.format(
                        args.max_epoch, args.split_id, args.rnn_cell)), 'w')

        spear_avg_corrs = []
        kendal_avg_corrs = []

        if args.dataset is None:
            for key_idx, _ in enumerate(test_keys):
                key_parts = test_keys[key_idx].split('/')
                name, key = key_parts
                seq = dataset[name][key]['features'][...]
                seq = torch.from_numpy(seq).unsqueeze(0)
                if use_gpu: seq = seq.cuda()
                probs, _, _ = model(seq)
                probs = probs.data.cpu().squeeze().numpy()
                cps = dataset[name][key]['change_points'][...]
                num_frames = dataset[name][key]['n_frames'][()]
                nfps = dataset[name][key]['n_frame_per_seg'][...].tolist()
                positions = dataset[name][key]['picks'][...]
                user_summary = dataset[name][key]['user_summary'][...]

                gtscore = dataset[name][key]['gtscore'][...]

                machine_summary, gt_frame_score = vsum_tools.generate_summary(
                    probs, gtscore, cps, num_frames, nfps, positions)
                fm, _, _ = vsum_tools.evaluate_summary(machine_summary,
                                                       user_summary,
                                                       eval_metric)
                fms.append(fm)

                #### Calculate correlation matrices ####
                user_scores = userscoreset[key]["user_scores"][...]
                machine_scores = generate_scores(probs, num_frames, positions)
                spear_avg_corr = evaluate_scores(machine_scores,
                                                 user_scores,
                                                 metric="spearmanr")
                kendal_avg_corr = evaluate_scores(machine_scores,
                                                  user_scores,
                                                  metric="kendalltau")

                spear_avg_corrs.append(spear_avg_corr)
                kendal_avg_corrs.append(kendal_avg_corr)

                if args.verbose:
                    table.append([key_idx + 1, key, "{:.1%}".format(fm)])

                if args.save_results:
                    h5_res.create_dataset(key + '/gt_frame_score',
                                          data=gt_frame_score)
                    h5_res.create_dataset(key + '/score', data=probs)
                    h5_res.create_dataset(key + '/machine_summary',
                                          data=machine_summary)
                    h5_res.create_dataset(
                        key + '/gtscore',
                        data=dataset[name][key]['gtscore'][...])
                    h5_res.create_dataset(key + '/fm', data=fm)
        else:
            for key_idx, key in enumerate(test_keys):
                seq = dataset[key]['features'][...]
                seq = torch.from_numpy(seq).unsqueeze(0)
                if use_gpu: seq = seq.cuda()
                probs, _, _ = model(seq)
                probs = probs.data.cpu().squeeze().numpy()
                cps = dataset[key]['change_points'][...]
                num_frames = dataset[key]['n_frames'][()]
                nfps = dataset[key]['n_frame_per_seg'][...].tolist()
                positions = dataset[key]['picks'][...]
                user_summary = dataset[key]['user_summary'][...]

                gtscore = dataset[key]['gtscore'][...]

                machine_summary, gt_frame_score = vsum_tools.generate_summary(
                    probs, gtscore, cps, num_frames, nfps, positions)
                fm, _, _ = vsum_tools.evaluate_summary(machine_summary,
                                                       user_summary,
                                                       eval_metric)
                fms.append(fm)

                #### Calculate correlation matrices ####
                user_scores = userscoreset[key]["user_scores"][...]
                machine_scores = generate_scores(probs, num_frames, positions)
                spear_avg_corr = evaluate_scores(machine_scores,
                                                 user_scores,
                                                 metric="spearmanr")
                kendal_avg_corr = evaluate_scores(machine_scores,
                                                  user_scores,
                                                  metric="kendalltau")

                spear_avg_corrs.append(spear_avg_corr)
                kendal_avg_corrs.append(kendal_avg_corr)

                if args.verbose:
                    table.append([key_idx + 1, key, "{:.1%}".format(fm)])

                if args.save_results:
                    h5_res.create_dataset(key + '/gt_frame_score',
                                          data=gt_frame_score)
                    h5_res.create_dataset(key + '/score', data=probs)
                    h5_res.create_dataset(key + '/machine_summary',
                                          data=machine_summary)
                    h5_res.create_dataset(key + '/gtscore',
                                          data=dataset[key]['gtscore'][...])
                    h5_res.create_dataset(key + '/fm', data=fm)

    if args.verbose:
        print(tabulate(table))

    if args.save_results: h5_res.close()

    mean_fm = np.mean(fms)
    print("Average F1-score {:.1%}".format(mean_fm))

    mean_spear_avg = np.mean(spear_avg_corrs)
    mean_kendal_avg = np.mean(kendal_avg_corrs)
    print("Average Kendal {}".format(mean_kendal_avg))
    print("Average Spear {}".format(mean_spear_avg))

    return mean_fm