예제 #1
0
def test(model, crit, dataset, vocab, opt):
    loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=True)
    scorer = COCOScorer()
    gt_dataframe = json_normalize(json.load(open(opt.input_json))['sentences'])
    gts = convert_data_to_coco_scorer_format(gt_dataframe)
    results = []
    samples = {}
    for data in loader:
        # forward the model to get loss
        fc_feats = Variable(data['fc_feats']).cuda()
        labels = Variable(data['labels']).long().cuda()
        with torch.no_grad():
            # forward the model to also get generated samples for each image
            seq_probs, seq_preds = model(fc_feats, labels, teacher_forcing_ratio=0)
            print(seq_preds)

        sents = utils.decode_sequence(vocab, seq_preds)

        for k, sent in enumerate(sents):
            video_id = 'video' + str(data['ix'][k])
            samples[video_id] = [{'image_id': video_id, 'caption': sent}]

    with suppress_stdout_stderr():
        valid_score = scorer.score(gts, samples, samples.keys())
    results.append(valid_score)
    print(valid_score)

    if not os.path.exists(opt.results_path):
        os.makedirs(opt.results_path)

    with open(os.path.join(opt.results_path, "scores.txt"), 'a') as scores_table:
            scores_table.write(json.dumps(results[0]) + "\n")
    with open(os.path.join(opt.results_path, opt.model.split("/")[-1].split('.')[0] + ".json"), 'w') as prediction_results:
        json.dump({"predictions": samples, "scores": valid_score}, prediction_results)
예제 #2
0
def test(model, crit, dataset, vocab, opt):
    model.eval()
    loader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=False)
    scorer = COCOScorer()
    gt_dataframe = json_normalize(
        json.load(open(opt["input_json"]))['sentences'])
    gts = convert_data_to_coco_scorer_format(gt_dataframe)
    #results = []
    samples = {}
    for index, data in enumerate(loader):
        print 'batch: ' + str((index + 1) * opt["batch_size"])
        # forward the model to get loss
        fc_feats = Variable(data['fc_feats'], volatile=True).cuda()
        labels = Variable(data['labels'], volatile=True).long().cuda()
        masks = Variable(data['masks'], volatile=True).cuda()
        video_ids = data['video_ids']

        # forward the model to also get generated samples for each image
        seq_probs, seq_preds = model(fc_feats, mode='inference', opt=opt)
        # print(seq_preds)

        sents = utils.decode_sequence(vocab, seq_preds)

        for k, sent in enumerate(sents):
            video_id = video_ids[k]
            samples[video_id] = [{'image_id': video_id, 'caption': sent}]
        # break
    with suppress_stdout_stderr():
        valid_score = scorer.score(gts, samples, samples.keys())
    #results.append(valid_score)
    #print(valid_score)

    if not os.path.exists(opt["results_path"]):
        os.makedirs(opt["results_path"])
    result = OrderedDict()
    result['checkpoint'] = opt["saved_model"][opt["saved_model"].rfind('/') +
                                              1:]
    score_sum = 0
    for key, value in valid_score.items():
        score_sum += float(value)
    result['sum'] = str(score_sum)
    #result = OrderedDict(result, **valid_score)
    result = OrderedDict(result.items() + valid_score.items())
    print result
    if not os.path.exists(opt["results_path"]):
        os.makedirs(opt["results_path"])
    with open(os.path.join(opt["results_path"], "scores.txt"),
              'a') as scores_table:
        scores_table.write(json.dumps(result) + "\n")
    with open(
            os.path.join(opt["results_path"],
                         opt["model"].split("/")[-1].split('.')[0] + ".json"),
            'w') as prediction_results:
        json.dump({
            "predictions": samples,
            "scores": valid_score
        }, prediction_results)
예제 #3
0
파일: eval.py 프로젝트: stillarrow/S2VT_ACT
def test(model, crit, dataset, vocab, device, opt):
    model.eval()
    loader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=True)
    scorer = COCOScorer()
    gt_dataframe = json_normalize(
        json.load(open(opt["input_json"]))['sentences'])
    gts = convert_data_to_coco_scorer_format(gt_dataframe)
    results = []
    samples = {}
    for data in loader:
        # forward the model to get loss
        fc_feats = data['fc_feats'].to(device)
        labels = data['labels'].to(device)
        masks = data['masks'].to(device)
        video_ids = data['video_ids']
        if opt["model"] == "S2VTACTModel":
            action = data['action'].to(device)
        # forward the model to also get generated samples for each image
        with torch.no_grad():
            if opt["model"] == "S2VTModel":
                seq_probs, seq_preds = model(fc_feats,
                                             mode='inference',
                                             opt=opt)
            else:
                seq_probs, seq_preds = model(fc_feats,
                                             action=action,
                                             device=device,
                                             mode='inference',
                                             opt=opt)

        sents = utils.decode_sequence(vocab, seq_preds)

        for k, sent in enumerate(sents):
            video_id = video_ids[k]
            samples[video_id] = [{'image_id': video_id, 'caption': sent}]

    with suppress_stdout_stderr():
        valid_score = scorer.score(gts, samples, samples.keys())
    results.append(valid_score)
    print(valid_score)

    if not os.path.exists(opt["results_path"]):
        os.makedirs(opt["results_path"])

    with open(os.path.join(opt["results_path"], "scores.txt"),
              'a') as scores_table:
        scores_table.write(json.dumps(results[0]) + "\n")
    with open(
            os.path.join(opt["results_path"],
                         opt["model"].split("/")[-1].split('.')[0] + ".json"),
            'w') as prediction_results:
        json.dump({
            "predictions": samples,
            "scores": valid_score
        }, prediction_results)
예제 #4
0
def test(model, crit, dataset, vocab, opt):
    model.eval()
    loader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=True)
    scorer = COCOScorer()
    gt_dataframe = json_normalize(
        json.load(open(opt["input_json"]))['sentences'])
    gts = convert_data_to_coco_scorer_format(gt_dataframe)
    results = []
    samples = {}
    seq_probs_list = []
    seq_preds_list = []
    masks_list = []
    labels_list = []

    for data in loader:
        # forward the model to get loss
        fc_feats = data['fc_feats'].cuda()
        if(opt["with_mean"] == 0):
                feats_3d = data['feats_3d'].cuda()
        labels = data['labels'].cuda()
        masks = data['masks'].cuda()
        video_ids = data['video_ids']
      
        # forward the model to also get generated samples for each image
        with torch.no_grad():
            if(opt["with_mean"] == 1):
                seq_probs, seq_preds = model(
                    fc_feats, mode='inference', opt=opt)
            else:
                seq_probs, seq_preds = model(
                    fc_feats, feats_3d, mode='inference', opt=opt)

        sents = utils.decode_sequence(vocab, seq_preds)

        for k, sent in enumerate(sents):
            video_id = video_ids[k]
            samples[video_id] = [{'image_id': video_id, 'caption': sent}]
        
        seq_preds_list.append(seq_preds)
        seq_probs_list.append(seq_probs)
        masks_list.append(masks)
        labels_list.append(labels)

    with suppress_stdout_stderr():
        valid_score = scorer.score(gts, samples, samples.keys())
    results.append(valid_score)
    print(valid_score)

    seq_probs_list = torch.cat(seq_probs_list, 0)
    seq_preds_list = torch.cat(seq_preds_list, 0)
    labels_list = torch.cat(labels_list, 0)
    masks_list = torch.cat(masks_list, 0)

    return valid_score, samples, seq_probs_list, seq_preds_list, labels_list, masks_list
예제 #5
0
def test(model, crit, dataset, vocab, opt):
    model.eval()
    loader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=True)
    scorer = COCOScorer()

    gt_dataframe = json_normalize(
        json.load(open('data_subset/vatex_subsample_v1.0.json')))

    gts = convert_data_to_coco_scorer_format(gt_dataframe, 'chinese')

    results = []
    samples = {}
    for data in loader:
        # forward the model to get loss
        i3d_feats = data['i3d_feats'].squeeze(1)  #.cuda()
        labels = data['labels']  #.cuda()
        masks = data['masks']  #.cuda()
        video_ids = data['video_ids']

        # forward the model to also get generated samples for each image
        with torch.no_grad():
            seq_probs, seq_preds = model(i3d_feats, mode='inference', opt=opt)
        sents = utils.decode_sequence(vocab, seq_preds)

        for k, sent in enumerate(sents):
            video_id = video_ids[k]
            samples[video_id] = [{'image_id': video_id, 'caption': sent}]

    with suppress_stdout_stderr():
        valid_score = scorer.score(gts, samples, samples.keys())
    results.append(valid_score)
    print(valid_score)

    if not os.path.exists(opt["results_path"]):
        os.makedirs(opt["results_path"])

    with open(
            os.path.join(opt["results_path"],
                         "chinese_LSTM_OPT_epoch601_scores.txt"),
            'a') as scores_table:
        scores_table.write(json.dumps(results[0]) + "\n")
    with open(
            os.path.join(
                opt["results_path"],
                opt["model"].split("/")[-1].split('.')[0] +
                "_chinese_LSTM_OPT_epoch601.json"), 'w') as prediction_results:
        json.dump({
            "predictions": samples,
            "scores": valid_score
        },
                  prediction_results,
                  indent=2)
예제 #6
0
def cal_score(opt, samples={'video9992': [{'image_id': 'video9992', 'caption': 'a man is riding a surfboard on a surfboard on the ocean'}],
    'video9997': [{'image_id': 'video9997', 'caption': 'a woman is applying her face'}]

    }, scorer=COCOScorer()):
    gt_dataframe = json_normalize(json.load(open(opt["input_json"]))['sentences'])
    gts = convert_data_to_coco_scorer_format(gt_dataframe)
    with suppress_stdout_stderr():
        valid_score = scorer.score(gts, samples, samples.keys())
    print(valid_score)
예제 #7
0
def test(model, crit, dataset, vocab, opt):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model.eval()
    loader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=False)
    scorer = COCOScorer()
    dataset_meta = json.load(open(opt["dataset_json"]))

    vid_to_meta = dataset_meta["vid_to_meta"]
    vid_ids = dataset_meta["split_to_ids"]["test"]

    # gt_dataframe = json_normalize(json.load(open(opt["dataset_json"]))['sentences'])
    gts = convert_data_to_coco_scorer_format(vid_ids, vid_to_meta)
    #results = []
    samples = {}

    with torch.no_grad():
        for index, data in enumerate(loader):
            print('batch: '+str((index+1)*opt["batch_size"]))
            # forward the model to get loss
            fc_feats = data['fc_feats'].to(device)
            video_id = data['video_ids'].cpu()

            # forward the model to also get generated samples for each image
            seq_probs, seq_preds = model(fc_feats, mode='inference', opt=opt)

            sents = utils.decode_sequence(vocab, seq_preds)

            for k, sent in enumerate(sents):
                # Iter through each video in batch and convert id back to original msvd key
                vid_key = vid_ids[video_id[k]]
                samples[vid_key] = [{'image_id': vid_key, 'caption': sent}]

    with suppress_stdout_stderr():
        valid_score = scorer.score(gts, samples, samples.keys())

    print(valid_score)
예제 #8
0
def test(model, crit, dataset, vocab, opt, writer):
    model.eval()
    loss_avg = averager()
    writer = SummaryWriter()
    loader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=True)
    scorer = COCOScorer()
    gt_dataframe = json_normalize(
        json.load(open(opt["input_json"]))['sentences'])
    gts = convert_data_to_coco_scorer_format(gt_dataframe)
    results = []
    samples = {}
    for data in loader:
        # forward the model to get loss
        fc_feats = data['fc_feats'].cuda()
        labels = data['labels'].cuda()
        masks = data['masks'].cuda()
        video_ids = data['video_ids']
        # clip_nums = data['clip_num']
        # sorted_clip_nums, indices = torch.sort(clip_nums, descending=True)
        # _, desorted_indices = torch.sort(indices, descending=False)
        # fc_feats = fc_feats[indices]
        # pack = rnn.pack_padded_sequence(fc_feats, sorted_clip_nums, batch_first=True)
        # forward the model to also get generated samples for each image
        with torch.no_grad():
            seq_probs, seq_preds = model(fc_feats, mode='inference', opt=opt)

        sents = utils.decode_sequence(vocab, seq_preds)

        for k, sent in enumerate(sents):
            video_id = video_ids[k]
            samples[video_id] = [{'image_id': video_id, 'caption': sent}]

    with suppress_stdout_stderr():
        valid_score = scorer.score(gts, samples, samples.keys())
    results.append(valid_score)
    print(valid_score)
예제 #9
0
def test(model, crit, dataset, vocab, opt):
    model.eval()
    loader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=True)
    scorer = COCOScorer()
    gt_dataframe = json_normalize(
        json.load(open(opt["input_json"]))['sentences'])
    gts = convert_data_to_coco_scorer_format(gt_dataframe)
    results = []
    samples = {}
    print("=================================================================")
    for data in loader:
        # forward the model to get loss
        fc_feats = data['fc_feats'].cuda()
        labels = data['labels'].cuda()
        masks = data['masks'].cuda()
        video_ids = data['video_ids']
        #print(opt)
        # forward the model to also get generated samples for each image
        with torch.no_grad():
            seq_probs, seq_preds = model(fc_feats, mode='inference', opt=opt)

        sents = utils.decode_sequence(vocab, seq_preds)
        print("[MODEL_OUT] The " + video_ids[0] + ": " + sents[0])
    print("=================================================================")
def test(model, rem, crit, dataset, vocab, opt):
    videos = json.load(open('caption1.json', 'r'))
    model.eval()
    loader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=False)
    scorer = COCOScorer()
    gt_dataframe = json_normalize(
        json.load(open(opt["input_json"]))['sentence'])
    gts = convert_data_to_coco_scorer_format(gt_dataframe)
    results = []
    results_f = []
    results_avg = []
    samples = {}
    samples_f = {}
    samples_avg = {}
    sample_all = {}
    for i, data in enumerate(loader):
        # forward the model to get loss
        fc_feats = data['fc_feats'].cuda()
        labels = data['labels'].cuda()
        masks = data['masks'].cuda()
        video_ids = data['video_ids']

        # forward the model to also get generated samples for each image
        with torch.no_grad():
            seq_probs, seq_preds, en_hn, de_hn = model(fc_feats,
                                                       mode='inference',
                                                       opt=opt)
            fake_en_hn = rem(de_hn, seq_probs)
            f_seq_probs, f_seq_preds, __, __ = model(fc_feats,
                                                     mode='inference',
                                                     h=fake_en_hn,
                                                     opt=opt)
            avg_en_hn = (en_hn + fake_en_hn) / 2
            avg_f_seq_probs, avg_f_seq_preds, __, __ = model(fc_feats,
                                                             mode='inference',
                                                             h=avg_en_hn,
                                                             opt=opt)
        sents = utils.decode_sequence(vocab, seq_preds)
        f_sents = utils.decode_sequence(vocab, f_seq_preds)
        avg_sents = utils.decode_sequence(vocab, avg_f_seq_preds)

        for k, sent in enumerate(sents):
            video_id = video_ids[k]
            samples[video_id] = [{'image_id': video_id, 'captions': sent}]
            samples_f[video_id] = [{
                'image_id': video_id,
                'captions': f_sents[k]
            }]
            samples_avg[video_id] = [{
                'image_id': video_id,
                'captions': avg_sents[k]
            }]
            sample_all[video_id] = [{
                'ground truth':
                videos[video_id]['captions'],
                'caption_origin':
                sent,
                'caption_fake':
                f_sents[k],
                'caption_average':
                avg_sents[k]
            }]

        if i > 1:
            print(seq_preds.size())
            break

    with suppress_stdout_stderr():
        valid_score = scorer.score(gts, samples, samples.keys())
        valid_score_f = scorer.score(gts, samples_f, samples_f.keys())
        valid_score_avg = scorer.score(gts, samples_avg, samples_avg.keys())
    results.append(valid_score)
    results_f.append(valid_score_f)
    results_avg.append(valid_score_avg)
    print(valid_score)

    if not os.path.exists(opt["results_path"]):
        os.makedirs(opt["results_path"])

    with open(os.path.join(opt["results_path"], "scores2.txt"),
              'w') as scores_table:
        scores_table.write(json.dumps(results[0]) + "\n")
    with open(os.path.join(opt["results_path"], "scores_f2.txt"),
              'w') as scores_table:
        scores_table.write(json.dumps(results_f[0]) + "\n")
    with open(os.path.join(opt["results_path"], "scores_avg2.txt"),
              'w') as scores_table:
        scores_table.write(json.dumps(results_avg[0]) + "\n")
    with open(
            os.path.join(opt["results_path"],
                         opt["model"].split("/")[-1].split('.')[0] + "2.json"),
            'w') as prediction_results:
        json.dump({
            "predictions": samples,
            "scores": valid_score
        }, prediction_results)
    with open(
            os.path.join(
                opt["results_path"],
                opt["model"].split("/")[-1].split('.')[0] + "_f2.json"),
            'w') as prediction_results:
        json.dump({
            "predictions": samples_f,
            "scores": valid_score_f
        }, prediction_results)
    with open(
            os.path.join(
                opt["results_path"],
                opt["model"].split("/")[-1].split('.')[0] + "_avg2.json"),
            'w') as prediction_results:
        json.dump({
            "predictions": samples_avg,
            "scores": valid_score_avg
        }, prediction_results)
    with open('./results/total_caption2.json', 'w') as f:
        json.dump({"total": sample_all}, f)
예제 #11
0
print(valid_score)

with suppress_stdout_stderr():
    samples = {}
    for key in keylist:
        samples[key] = []
        index = np.array(res2[key]).argmax()
        samples[key].append({'image_id': key, 'caption': sent[key][5::6][index]})
    valid_score, detail_scores = scorer.score(gts, samples, samples.keys())
print(valid_score)
'''
sent = pickle.load(
    open(
        "/home/yangbang/VideoCaptioning/ARVC/AR_topk_collect_results/msrvtt_5.pkl",
        'rb'))
scorer = COCOScorer()
gts = pickle.load(
    open("/home/yangbang/VideoCaptioning/MSRVTT/msrvtt_refs.pkl", 'rb'))

B = 5
keylist = sent.keys()
res = {}
res2 = {}
for k in keylist:
    res[k] = []
    res2[k] = []
with suppress_stdout_stderr():
    for i in range(B):
        samples = {}
        for key in keylist:
            samples[key] = []
def run_eval(opt,
             model,
             crit,
             loader,
             vocab,
             device,
             json_path='',
             json_name='',
             scorer=COCOScorer(),
             teacher_model=None,
             dict_mapping={},
             no_score=False,
             print_sent=False,
             analyze=False,
             collect_best_candidate_iterative_results=False,
             collect_path=None,
             extra_opt={},
             summarywriter=None,
             global_step=0):
    opt.update(extra_opt)
    model.eval()
    if teacher_model is not None:
        teacher_model.eval()

    gt_captions = loader.dataset.get_references()
    pred_captions = defaultdict(list)

    opt['collect_best_candidate_iterative_results'] = collect_best_candidate_iterative_results
    translator = Translator(model=model,
                            opt=opt,
                            teacher_model=teacher_model,
                            dict_mapping=dict_mapping)

    best_candidate_sents = defaultdict(list)
    best_candidate_score = defaultdict(list)

    best_ar_sent = []
    all_time = 0

    if crit is not None:
        crit.reset_loss_recorder()

    collect_ar_flag = (opt['decoding_type'] == 'ARFormer'
                       and collect_best_candidate_iterative_results)

    for data in tqdm(loader, ncols=150, leave=False):
        with torch.no_grad():
            encoder_outputs, category, labels = get_forword_results(
                opt, model, data, device=device, only_data=True, vocab=vocab)
            if crit is not None:
                _ = crit.get_loss(encoder_outputs)

            if teacher_model is not None:
                teacher_encoder_outputs, *_ = get_forword_results(
                    opt,
                    teacher_model,
                    data,
                    device=device,
                    only_data=True,
                    vocab=vocab)
            else:
                teacher_encoder_outputs = None

            if opt['batch_size'] == 1:
                start_time = time.time()
            all_hyp, all_scores = translator.translate_batch(
                encoder_outputs,
                category,
                labels,
                vocab,
                teacher_encoder_outputs=teacher_encoder_outputs)
            if opt['batch_size'] == 1:
                all_time += (time.time() - start_time)

            if isinstance(all_hyp, torch.Tensor):
                if len(all_hyp.shape) == 2:
                    all_hyp = all_hyp.unsqueeze(1)
                all_hyp = all_hyp.tolist()
            if isinstance(all_scores, torch.Tensor):
                if len(all_scores.shape) == 2:
                    all_scores = all_scores.unsqueeze(1)
                all_scores = all_scores.tolist()

            video_ids = np.array(data['video_ids']).reshape(-1)

        for k, hyps in enumerate(all_hyp):
            video_id = video_ids[k]
            if not no_score:
                assert len(hyps) == 1

            for j, hyp in enumerate(hyps):
                sent = to_sentence(hyp, vocab)
                if opt.get('duplicate',
                           False) and opt['decoding_type'] == 'NARFormer':
                    sent, _ = duplicate(sent)
                if print_sent:
                    tqdm.write(video_id + ': ' + sent)

                if not collect_ar_flag:
                    # for evaluation
                    pred_captions[video_id].append({
                        'image_id': video_id,
                        'caption': sent
                    })
                else:
                    # for collection
                    pred_captions[video_id].append({
                        'caption': sent,
                        'score': all_scores[k][j]
                    })

        if collect_best_candidate_iterative_results and not collect_ar_flag:
            assert isinstance(all_scores, tuple)
            all_sents = all_scores[0].tolist()
            all_score = all_scores[1].tolist()

            if len(video_ids) != len(all_sents):
                video_ids = np.array(data['video_ids'])[:, np.newaxis].repeat(
                    opt['length_beam_size'], axis=1).reshape(-1)
                assert len(video_ids) == len(all_sents)

            for k, (hyps, scores) in enumerate(zip(all_sents, all_score)):
                video_id = video_ids[k]
                pre_sent_len = 0
                assert len(hyps) == len(scores)

                for j, (hyp, score) in enumerate(zip(hyps, scores)):
                    sent = to_sentence(hyp, vocab)

                    if not pre_sent_len:
                        pre_sent_len = len(sent.split(' '))
                    else:
                        assert len(sent.split(' ')) == pre_sent_len

                    tqdm.write(('%10s' % video_id) +
                               '(iteration %d Length %d): ' %
                               (j, len(sent.split(' '))) + sent)

                    best_candidate_sents[video_id].append(sent)
                    best_candidate_score[video_id].append(score)

    if collect_best_candidate_iterative_results:
        assert collect_path is not None
        if not collect_ar_flag:
            pickle.dump([best_candidate_sents, best_candidate_score],
                        open(collect_path, 'wb'))
        else:
            pickle.dump(pred_captions, open(collect_path, 'wb'))

    if opt['batch_size'] == 1:
        latency = all_time / len(loader)
        print(latency, len(loader))

    res = {}
    if analyze:
        ave_length, novel, unique, usage, hy_res, gram4 = analyze_length_novel_unique(
            loader.dataset.captions,
            pred_captions,
            vocab,
            splits=loader.dataset.splits,
            n=1)
        res.update({
            'ave_length': ave_length,
            'novel': novel,
            'unique': unique,
            'usage': usage,
            'gram4': gram4
        })

    if not no_score:
        with suppress_stdout_stderr():
            valid_score, detail_scores = scorer.score(gt_captions,
                                                      pred_captions,
                                                      pred_captions.keys())

        res.update(valid_score)
        metric_sum = opt.get('metric_sum', [1, 1, 1, 1])
        candidate = [
            res["Bleu_4"], res["METEOR"], res["ROUGE_L"], res["CIDEr"]
        ]
        res['Sum'] = sum([
            item for index, item in enumerate(candidate) if metric_sum[index]
        ])
        if crit is not None:
            names, metrics = crit.get_loss_info()
            for n, m in zip(names, metrics):
                res[n] = m

    if summarywriter is not None:
        for k, v in res.items():
            summarywriter.add_scalar(k, v, global_step=global_step)

    if json_path:
        if not os.path.exists(json_path):
            os.makedirs(json_path)

        with open(os.path.join(json_path, json_name),
                  'w') as prediction_results:
            json.dump({
                "predictions": pred_captions,
                "scores": valid_score
            }, prediction_results)
            prediction_results.close()

    return res
예제 #13
0
def run_eval_ensemble(opt, opt_list, models, crit, loader, vocab, device, json_path='', json_name='', scorer=COCOScorer(), print_sent=False, no_score=False, analyze=False):
    translator = Translator_ensemble(model=models, opt=opt)

    videodatainfo = json.load(open(opt["input_json"]))
    gt_dataframe = json_normalize(videodatainfo['sentences'])
    gts = convert_data_to_coco_scorer_format(gt_dataframe) 
    samples = {}
    sentences = []
    
    for data in tqdm(loader, ncols=150, leave=False):
        with torch.no_grad():
            enc_output = []
            enc_hidden = []
            for i, model in enumerate(models):
                encoder_outputs, category, _ = get_forword_results(opt_list[i], model, data, device=device, only_data=True)
                enc_output.append(encoder_outputs['enc_output'])
                enc_hidden.append(encoder_outputs['enc_hidden'])

            all_hyp, all_scores = translator.translate_batch(enc_output, enc_hidden, category)

            if isinstance(all_hyp, torch.Tensor):
                if len(all_hyp.shape) == 2:
                    all_hyp = all_hyp.unsqueeze(1)
                all_hyp = all_hyp.tolist()
            video_ids = np.array(data['video_ids']).reshape(-1)

        for k, hyps in enumerate(all_hyp):
            video_id = video_ids[k]
            samples[video_id] = []
            if not no_score: 
                assert len(hyps) == 1
            index = 0
            for j, hyp in enumerate(hyps):
                sent = to_sentence(hyp, vocab)
                if not opt.get('no_duplicate', False) and opt['decoder_type'] == 'NARFormer':
                    sent, _ = duplicate(sent)
                if print_sent:
                    tqdm.write(video_id + ': ' + sent)
                samples[video_id].append({'image_id': video_id, 'caption': sent})
                if len(sent.split(' ')) <= 3:
                    continue
                sentences.append({'caption': sent, 'video_id': video_id, 'sen_id': index})
                index += 1
    res = {}
    if analyze:
        gt_caption = json.load(open(opt['caption_json']))
        ave_length, novel, unique, usage, hy_res = analyze_length_novel_unique(gt_caption, samples, n=1, dataset=opt['dataset'])
        res.update({'ave_length': ave_length, 'novel': novel, 'unique': unique, 'usage': usage})   

    if not no_score:
        with suppress_stdout_stderr():
            valid_score = scorer.score(gts, samples, samples.keys())

        res.update(valid_score)
        res['loss'] = 0

        metric_sum = opt.get('metric_sum', [1, 1, 1, 1])
        res['Sum'] = 0
        candidate = [res["Bleu_4"], res["METEOR"], res["ROUGE_L"], res["CIDEr"]]
        for i, item in enumerate(metric_sum):
            if item: res['Sum'] += candidate[i]

        if json_path:
            if not os.path.exists(json_path):
                os.makedirs(json_path)
            #print('364 364 364 364')
            with open(os.path.join(json_path, json_name), 'w') as prediction_results:
                json.dump({"predictions": samples, "scores": valid_score}, prediction_results)
                prediction_results.close()
        return res

    return None
예제 #14
0
def run_eval(opt, model, crit, loader, vocab, device, json_path='', json_name='', scorer=COCOScorer(), print_sent=False, teacher_model=None, length_crit=None, 
    no_score=False, save_videodatainfo=False,  saved_with_pickle=False, pickle_path=None, dict_mapping={}, analyze=False, collect_best_candidate_iterative_results=False, collect_path=None, 
    write_time=False, save_embs=False, save_to_spice=False, calculate_novel=True, evaluate_iterative_results=False, update_gram4=False):
    model.eval()
    if teacher_model is not None:
        teacher_model.eval()

    gts = loader.dataset.get_references()
    refs = defaultdict(list)
    spice_res = []
    samples = {}

    id_to_vid, now_mode = loader.dataset.get_mode()
    vatex = (opt['dataset'].lower() == 'vatex' and now_mode == 'test')
    vatex_samples = {}
    if vatex:
        assert id_to_vid is not None
        vid_to_id = {v: k for k, v in id_to_vid.items()}

    opt['collect_best_candidate_iterative_results'] = collect_best_candidate_iterative_results
    translator = Translator(model=model, opt=opt, teacher_model=teacher_model, dict_mapping=dict_mapping)

    best_candidate_sents = defaultdict(list)
    best_candidate_score = defaultdict(list)
    '''
    if opt.get('collect_last', False):
        best_candidate_sents = [[]]
        best_candidate_score = [[]]
    else:
        best_candidate_sents = [[] for _ in range(opt['iterations'] if opt.get("nv_scale", 0) != 100 else opt['iterations']+1)]
        best_candidate_score = [[] for _ in range(opt['iterations'] if opt.get("nv_scale", 0) != 100 else opt['iterations']+1)]
    '''
    best_ar_sent = []

    all_time = 0
    if save_embs:
        import h5py
        word_to_ix = {k:v for v, k in vocab.items()}
        embs_pth = './collect_embs'
        if not os.path.exists(embs_pth):
            os.makedirs(embs_pth)
        embs_db_name = os.path.basename(collect_path).split('.')[0] + '.hdf5'
        embs_db = h5py.File(os.path.join(embs_pth, embs_db_name), 'a')
        index_set = defaultdict(set)

    #target_sent = "a man is playing a video game"
    #target_count = 0
    #unique_sent = set()

    for data in tqdm(loader, ncols=150, leave=True):
        with torch.no_grad():
            encoder_outputs, category, labels, feats_t = get_forword_results(opt, model, data, device=device, only_data=True, vocab=vocab)
            if teacher_model is not None:
                teacher_encoder_outputs, _, _, _ = get_forword_results(opt, teacher_model, data, device=device, only_data=True, vocab=vocab)
            else:
                teacher_encoder_outputs = None

            if opt['batch_size'] == 1:
                start_time = time.time()
            all_hyp, all_scores = translator.translate_batch(encoder_outputs, category, labels, vocab, teacher_encoder_outputs=teacher_encoder_outputs, tags=feats_t)
            if opt['batch_size'] == 1:
                all_time += (time.time() - start_time)

            if isinstance(all_hyp, torch.Tensor):
                if len(all_hyp.shape) == 2:
                    all_hyp = all_hyp.unsqueeze(1)
                all_hyp = all_hyp.tolist()
            if isinstance(all_scores, torch.Tensor):
                if len(all_scores.shape) == 2:
                    all_scores = all_scores.unsqueeze(1)
                all_scores = all_scores.tolist()

            video_ids = np.array(data['video_ids']).reshape(-1)

        for k, hyps in enumerate(all_hyp):
            video_id = video_ids[k]
            samples[video_id] = []
            if not no_score: 
                assert len(hyps) == 1

            for j, hyp in enumerate(hyps):
                sent = to_sentence(hyp, vocab)
                if not opt.get('no_duplicate', False) and opt['decoder_type'] == 'NARFormer':
                    sent, _ = duplicate(sent)
                if print_sent:
                    tqdm.write(video_id + ': ' + sent)
                    #sent, res = duplicate(sent)
                    #tqdm.write(video_id + ': ' + res)
                    #tqdm.write(video_id + ': ' + sent)
                samples[video_id].append({'image_id': video_id, 'caption': sent})

                #if target_sent in sent:
                #    target_count += 1
                #    print(video_id)
                #unique_sent.add(sent)

                if vatex:
                    vatex_samples[vid_to_id[video_id]] = sent

                #if len(sent.split(' ')) <= 3:
                #    continue
                if save_to_spice:
                    tmp2 = []
                    for item in gts[video_id]:
                        tmp2.append(item['caption'])
                    tmp = {'image_id': video_id, 'test': sent, 'refs': tmp2}
                    spice_res.append(tmp)

                if save_videodatainfo:
                    refs[video_id].append({'image_id': video_id, 'cap_id': len(refs[video_id]), 'caption': sent, 'score': all_scores[k][j]})

        if collect_best_candidate_iterative_results:
            assert isinstance(all_scores, tuple)
            all_sents = all_scores[0].tolist()
            all_score = all_scores[1].tolist()

            if len(video_ids) != len(all_sents):
                video_ids = np.array(data['video_ids'])[:, np.newaxis].repeat(opt['length_beam_size'], axis=1).reshape(-1)
                assert len(video_ids) == len(all_sents)

            for k, (hyps, scores) in enumerate(zip(all_sents, all_score)):
                #video_id = video_ids[k]

                video_id = video_ids[k]
                pre_sent_len = 0
                assert len(hyps) == len(scores)  

                for j, (hyp, score) in enumerate(zip(hyps, scores)):
                    sent = to_sentence(hyp, vocab)
                    if save_embs:
                        utils.get_words_with_specified_tags(word_to_ix, sent, index_set[video_id])

                    if not pre_sent_len: 
                        pre_sent_len = len(sent.split(' '))
                    else:
                        assert len(sent.split(' ')) == pre_sent_len

                    tqdm.write(('%10s' % video_id) + '(iteration %d Length %d): ' % (j, len(sent.split(' '))) + sent)
                    '''
                    repetition_rate_result = calculate_repetition_rate(sent)
                    for n_gram in range(4):
                        repetition_rate_results[j][n_gram][0] += repetition_rate_result[n_gram][0]
                        repetition_rate_results[j][n_gram][1] += repetition_rate_result[n_gram][1]
                    '''
                    #best_candidate_sents[j].append([video_id, sent])
                    #best_candidate_score[j].append([video_id, score])
                    best_candidate_sents[video_id].append(sent)
                    best_candidate_score[video_id].append(score)

    #print(target_sent)
    #print(target_count)
    #print(list(unique_sent))

    if evaluate_iterative_results:
        assert collect_best_candidate_iterative_results
        keylist = list(best_candidate_sents.keys())
        itrs = len(best_candidate_sents[keylist[0]])
        b4 = []
        m = []
        r = []
        c = []
        for i in range(itrs):
            samples = {}
            for key in keylist:
                samples[key] = []
                samples[key].append({'image_id': key, 'caption': best_candidate_sents[key][i]})     
            
            with suppress_stdout_stderr():
                valid_score, detail_scores = scorer.score(gts, samples, samples.keys())
                b4.append(valid_score["Bleu_4"])
                m.append(valid_score["METEOR"])
                r.append(valid_score["ROUGE_L"])
                c.append(valid_score["CIDEr"])
        print(b4)
        print(m)
        print(r)
        print(c)
        #exit()
        no_score = True


    if save_to_spice:
        pth = './spice_json'
        if not os.path.exists(pth):
            os.makedirs(pth)
        if opt['na']:
            filename = '%s_%s_%s%s_lbs%d_i%d.json'%(opt['dataset'], opt['method'], 'AE' if opt['nv_scale'] else '', opt['paradigm'], opt['length_beam_size'], opt['iterations'])
        else:
            filename = '%s_%d.json' % (opt['dataset'], opt['beam_size'])
        json.dump(spice_res, open(os.path.join(pth, filename), 'w'))

    if collect_best_candidate_iterative_results:
        if save_embs:
            for k in index_set.keys():
                tmp = torch.LongTensor(list(index_set[k])).to(device)
                emb = model.decoder.bert.embedding.word_embeddings(tmp).mean(0).detach().cpu().numpy()
                print(emb.shape)
                embs_db[k] = emb

        assert collect_path is not None
        if not save_embs:
            pickle.dump(
                    [best_candidate_sents, best_candidate_score],
                    open(collect_path, 'wb')
                )

    if opt['batch_size'] == 1:
        latency = all_time/len(loader)

        if write_time:
            f = open('latency.txt', 'a')
            if opt['ar']:
                f.write('AR%d %s %.1f\n'%(opt['beam_size'], opt['dataset'], 1000*latency))
            else:
                f.write('NA %s %s %s %s %d%s%d %.1f\n'%(opt['method'], opt['dataset'], 'AE' if opt['nv_scale'] else '_', opt['paradigm'], opt['length_beam_size'], ' ' if opt['paradigm']=='mp' else (" %d "%opt['q']), opt['iterations'], 1000*latency))

        print(latency, len(loader))            

    res = {}
    if analyze:
        ave_length, novel, unique, usage, hy_res, gram4 = analyze_length_novel_unique(loader.dataset.captions, samples, vocab, splits=loader.dataset.splits, n=1, calculate_novel=calculate_novel)
        if update_gram4:
            res.update({'gram4': gram4})
        res.update({'ave_length': ave_length, 'novel': novel, 'unique': unique, 'usage': usage})   

    if not no_score:
        with suppress_stdout_stderr():
            valid_score, detail_scores = scorer.score(gts, samples, samples.keys())

        #json.dump(detail_scores, open('./nacf_ctmp_b6i5_135.json', 'w'))
        #json.dump(detail_scores, open('./nab_mp_b6i5_135.json', 'w'))
        #json.dump(detail_scores, open('./arb_b5.json', 'w'))
        #json.dump(detail_scores, open('./arb2_b5.json', 'w'))

        #print(detail_scores)
        res.update(valid_score)
        res['loss'] = 0
        if write_time:
            f.write("B4: %.2f\tM: %.2f\tR: %.2f\tC: %.2f\n" % (100 * res["Bleu_4"], 100 * res['METEOR'], 100 * res["ROUGE_L"], 100 * res["CIDEr"]))

        metric_sum = opt.get('metric_sum', [1, 1, 1, 1])
        res['Sum'] = 0
        candidate = [res["Bleu_4"], res["METEOR"], res["ROUGE_L"], res["CIDEr"]]
        for i, item in enumerate(metric_sum):
            if item: res['Sum'] += candidate[i]

        if json_path:
            if not os.path.exists(json_path):
                os.makedirs(json_path)
            #print('364 364 364 364')
            with open(os.path.join(json_path, json_name), 'w') as prediction_results:
                json.dump({"predictions": samples, "scores": valid_score}, prediction_results)
                prediction_results.close()

        def calculate_repetition_rate(sent, n):
            length = len(sent)
            rec = {}
            dd_count = 0
            for i in range(length-n+1):
                key = ' '.join(sent[i:i+n])
                if key in rec.keys():
                    dd_count += 1
                else:
                    rec[key] = i
            return dd_count, length-n+1

        dc, ac = 0, 0
        for key in samples.keys():
            tmp_dc, tmp_ac = calculate_repetition_rate(samples[key][0]['caption'].split(' '), n=1)
            dc += tmp_dc
            ac += tmp_ac
        #print('1-gram repetition rate: %.2f' % (100 * dc / ac))

        return res

    if json_path and vatex:
        if not os.path.exists(json_path):
            os.makedirs(json_path)
        with open(os.path.join(json_path, json_name), 'w') as prediction_results:
            json.dump(vatex_samples, prediction_results)

    if save_videodatainfo:
        if saved_with_pickle:
            assert pickle_path is not None
            pickle.dump(refs, open(pickle_path, 'wb'))
        else:
            model_name = '%s_%s' % (opt['encoder_type'], opt['decoder_type'])
            description = 'The sentences generated by %s where each video has %d captions.' % (model_name, opt['topk'])
            pth = os.path.join(opt['base_dir'], opt['dataset'], 'arvc%d_refs.pkl' % opt['topk'])

            pickle.dump(refs, open(pth, 'wb'))
            tqdm.write('Teacher videodatainfo has been saved to %s' % pth)
            cmd = 'python msvd_prepross.py -tdc %s -ori_wct %d -topk %d' % (opt['dataset'], opt['word_count_threshold'], opt['topk'])
            os.system(cmd)

    return res