Esempio n. 1
0
def language_eval(predictions, cocofmt_file, opt):
    logger.info('>>> Language evaluating ...')
    tmp_checkpoint_json = os.path.join(opt.model_file + str(uuid.uuid4()) +
                                       '.json')
    json.dump(predictions, open(tmp_checkpoint_json, 'w'))
    lang_stats = utils.language_eval(cocofmt_file, tmp_checkpoint_json)
    os.remove(tmp_checkpoint_json)
    return lang_stats
Esempio n. 2
0
def validate(model, criterion, loader, opt):
    model.eval()
    loader.reset()

    num_videos = loader.get_num_videos()
    batch_size = loader.get_batch_size()
    num_iters = int(math.ceil(num_videos * 1.0 / batch_size))
    last_batch_size = num_videos % batch_size
    seq_per_img = loader.get_seq_per_img()
    model.set_seq_per_img(seq_per_img)

    loss_sum = 0
    logger.info('#num_iters: %d, batch_size: %d, seg_per_image: %d', num_iters,
                batch_size, seq_per_img)
    predictions = []
    gt_avglogps = []
    test_avglogps = []
    for ii in range(num_iters):
        data = loader.get_batch()
        feats = [Variable(feat, volatile=True) for feat in data['feats']]
        if loader.has_label:
            labels = Variable(data['labels'], volatile=True)
            masks = Variable(data['masks'], volatile=True)

        if ii == (num_iters - 1) and last_batch_size > 0:
            feats = [f[:last_batch_size] for f in feats]
            if loader.has_label:
                labels = labels[:last_batch_size *
                                seq_per_img]  # labels shape is DxN
                masks = masks[:last_batch_size * seq_per_img]

        if torch.cuda.is_available():
            feats = [feat.cuda() for feat in feats]
            if loader.has_label:
                labels = labels.cuda()
                masks = masks.cuda()

        if loader.has_label:
            pred, gt_seq, gt_logseq = model(feats, labels)
            if opt.output_logp == 1:
                gt_avglogp = utils.compute_avglogp(gt_seq, gt_logseq.data)
                gt_avglogps.extend(gt_avglogp)

            loss = criterion(pred, labels[:, 1:], masks[:, 1:])
            loss_sum += loss.data[0]

        seq, logseq = model.sample(feats, {'beam_size': opt.beam_size})
        sents = utils.decode_sequence(opt.vocab, seq)
        if opt.output_logp == 1:
            test_avglogp = utils.compute_avglogp(seq, logseq)
            test_avglogps.extend(test_avglogp)

        for jj, sent in enumerate(sents):
            if opt.output_logp == 1:
                entry = {
                    'image_id': data['ids'][jj],
                    'caption': sent,
                    'avglogp': test_avglogp[jj]
                }
            else:
                entry = {'image_id': data['ids'][jj], 'caption': sent}
            predictions.append(entry)
            logger.debug('[%d] video %s: %s' %
                         (jj, entry['image_id'], entry['caption']))

    loss = round(loss_sum / num_iters, 3)
    results = {}
    lang_stats = {}

    if opt.language_eval == 1 and loader.has_label:
        logger.info('>>> Language evaluating ...')
        tmp_checkpoint_json = os.path.join(opt.model_file + str(uuid.uuid4()) +
                                           '.json')
        json.dump(predictions, open(tmp_checkpoint_json, 'w'))
        lang_stats = utils.language_eval(loader.cocofmt_file,
                                         tmp_checkpoint_json)
        os.remove(tmp_checkpoint_json)

    results['predictions'] = predictions
    results['scores'] = {'Loss': -loss}
    results['scores'].update(lang_stats)

    if opt.output_logp == 1:
        avglogp = sum(test_avglogps) / float(len(test_avglogps))
        results['scores'].update({'avglogp': avglogp})

        gt_avglogps = np.array(gt_avglogps).reshape(-1, seq_per_img)
        assert num_videos == gt_avglogps.shape[0]

        gt_avglogps_file = opt.model_file.replace('.pth', '_gt_avglogps.pkl',
                                                  1)
        cPickle.dump(gt_avglogps,
                     open(gt_avglogps_file, 'w'),
                     protocol=cPickle.HIGHEST_PROTOCOL)

        logger.info('Wrote GT logp to: %s', gt_avglogps_file)

    return results
Esempio n. 3
0
def validate(model, criterion, loader, opt, max_iters=None, type='val'):
    model.eval()
    loader.reset()

    num_videos = loader.get_num_videos()
    batch_size = loader.get_batch_size()
    if max_iters is None:
        num_iters = int(math.ceil(num_videos * 1.0 / batch_size))
    else:
        num_iters = max_iters
    last_batch_size = num_videos % batch_size
    seq_per_img = loader.get_seq_per_img()
    model.set_seq_per_img(seq_per_img)

    loss_sum = 0
    logger.info('#num_iters: %d, batch_size: %d, seg_per_image: %d', num_iters,
                batch_size, seq_per_img)
    predictions = []
    gt_avglogps = []
    test_avglogps = []
    prec_recs = dict()
    for ii in range(num_iters):
        data = loader.get_batch()
        feats = data['feats']
        bfeats = data['bfeats']
        if loader.has_label:
            labels = data['labels']
            masks = data['masks']
            labels_svo = data['labels_svo']

        if ii == (num_iters - 1) and last_batch_size > 0:
            feats = [f[:last_batch_size] for f in feats]
            bfeats = [f[:last_batch_size] for f in bfeats]
            if loader.has_label:
                labels = labels[:last_batch_size *
                                seq_per_img]  # labels shape is DxN
                masks = masks[:last_batch_size * seq_per_img]
                labels_svo = labels_svo[:last_batch_size *
                                        seq_per_img]  # labels shape is DxN

        if torch.cuda.is_available():
            feats = [feat.cuda() for feat in feats]
            bfeats = [bfeat.cuda() for bfeat in bfeats]
            if loader.has_label:
                labels = labels.cuda()
                masks = masks.cuda()
                labels_svo = labels_svo.cuda()

        if loader.has_label and model.gt_concepts_while_testing == 0:
            pred, gt_seq, gt_logseq, _, _, _ = model(feats, bfeats, labels,
                                                     labels_svo)
            # memReport()
            if opt.output_logp == 1:
                gt_avglogp = utils.compute_avglogp(gt_seq, gt_logseq.data)
                gt_avglogps.extend(gt_avglogp)

            loss = criterion(pred, labels[:, 1:], masks[:, 1:])
            loss_sum += loss.item()
            del pred, gt_seq, gt_logseq
            torch.cuda.empty_cache()

        seq, logseq, _, concept_seq = model.sample(
            feats, bfeats, labels_svo, {'beam_size': opt.beam_size})
        sents = utils.decode_sequence(opt.vocab, seq)
        if opt.output_logp == 1:
            test_avglogp = utils.compute_avglogp(seq, logseq)
            test_avglogps.extend(test_avglogp)

        if concept_seq is not None:

            # if type == 'test':
            #     if concept_seq.shape[0] != 136:
            #         print()
            labels_svo = torch.reshape(
                labels_svo, (-1, opt.test_seq_per_img, opt.num_concepts))[:, 0]
            # concept_seq = torch.reshape(concept_seq, (-1, opt.test_seq_per_img, opt.num_concepts))[:, 0]

            concept_seq_words = utils.decode_sequence(opt.vocab, concept_seq)

            # Calculate TP,FP,FN for precision and recall calcs
            if opt.grounder_type in ['niuc', 'nioc', 'iuc', 'ioc']:
                gt_concept_seq_words = utils.decode_sequence(
                    opt.vocab, labels_svo)
                gt_concept_seq_words = [
                    g.split(' ') for g in gt_concept_seq_words
                ]
                for bi in range(len(gt_concept_seq_words)):
                    pr_words = list()
                    repeat = int(
                        len(gt_concept_seq_words) / len(concept_seq_words))

                    for pr in concept_seq_words[int(
                            math.floor(float(bi) / repeat))].split(' '):
                        pr_word = pr.split(' ')[0]
                        pr_words.append(pr_word)
                        if pr_word not in prec_recs:
                            prec_recs[pr_word] = [0, 0, 0]
                        if pr_word in gt_concept_seq_words[bi]:
                            prec_recs[pr_word][0] += 1  # TP
                        else:
                            prec_recs[pr_word][1] += 1  # FP
                    for gt in gt_concept_seq_words[bi]:
                        if gt not in prec_recs:
                            prec_recs[gt] = [0, 0, 0]
                        if gt not in pr_words:
                            prec_recs[gt][2] += 1  # FN
            try:
                for jj, (sent,
                         sent_svo) in enumerate(zip(sents, concept_seq_words)):
                    if opt.output_logp == 1:
                        entry = {
                            'image_id': data['ids'][jj],
                            'caption': sent,
                            'svo': sent_svo,
                            'avglogp': test_avglogp[jj],
                            'box_att': model.attention_record[jj].tolist()
                        }
                    else:
                        entry = {
                            'image_id': data['ids'][jj],
                            'caption': sent,
                            'svo': sent_svo
                        }  #, 'box_att': model.attention_record[jj].tolist()}  # todo removed fot transformer model
                    predictions.append(entry)
                    logger.debug('[%d] video %s: %s pr(%s) gt(%s)' %
                                 (jj, entry['image_id'], entry['caption'],
                                  entry['svo'], gt_concept_seq_words[jj]))
            except IndexError:
                print()
        else:

            for jj, sent in enumerate(sents):
                if opt.output_logp == 1:
                    entry = {
                        'image_id': data['ids'][jj],
                        'caption': sent,
                        'avglogp': test_avglogp[jj],
                        'box_att': model.attention_record[jj].tolist()
                    }
                else:
                    entry = {'image_id': data['ids'][jj], 'caption': sent}
                predictions.append(entry)
                logger.debug('[%d] video %s: %s' %
                             (jj, entry['image_id'], entry['caption']))

        del feats, labels, masks, labels_svo, seq, logseq
        torch.cuda.empty_cache()

    loss = round(loss_sum / num_iters, 3)
    results = {}
    lang_stats = {}

    if opt.language_eval == 1 and loader.has_label:
        logger.info('>>> Language evaluating ...')
        tmp_checkpoint_json = os.path.join(
            opt.model_file.split('.')[0] + '_' + type + '.json')
        json.dump(predictions, open(tmp_checkpoint_json, 'w'))
        lang_stats = utils.language_eval(loader.cocofmt_file,
                                         tmp_checkpoint_json)
        # os.remove(tmp_checkpoint_json)

    results['predictions'] = predictions
    results['scores'] = {'Loss': -loss}
    results['scores'].update(lang_stats)

    if opt.output_logp == 1:
        avglogp = sum(test_avglogps) / float(len(test_avglogps))
        results['scores'].update({'avglogp': avglogp})

        gt_avglogps = np.array(gt_avglogps).reshape(-1, seq_per_img)
        assert num_videos == gt_avglogps.shape[0]

        gt_avglogps_file = opt.model_file.replace('.pth', '_gt_avglogps.pkl',
                                                  1)
        cPickle.dump(gt_avglogps,
                     open(gt_avglogps_file, 'w'),
                     protocol=cPickle.HIGHEST_PROTOCOL)

        logger.info('Wrote GT logp to: %s', gt_avglogps_file)

    if len(prec_recs.keys()) > 0:
        prec = dict()
        rec = dict()
        for k, v in prec_recs.items():
            if v[0] + v[1] > 0:
                prec[k] = v[0] / float(v[0] + v[1])
            else:
                prec[k] = 0
            if v[0] + v[2] > 0:
                rec[k] = v[0] / float(v[0] + v[2])
            else:
                rec[k] = 0

        precv = sum(prec.values()) / len(prec_recs)
        recv = sum(rec.values()) / len(prec_recs)
        results['scores'].update({'prec': precv, 'rec': recv})
        print('prec: ', precv, ' .. rec: ', recv)
        logger.debug('prec: ' + str(prec))
        logger.debug('rec: ' + str(rec))
    return results
            # append the caption to either the predictions or the fake groundtruth
            cap_id = 0
            for index, cap in enumerate(caps[id]):
                if index == sample_index:  # this is the 'predicted' caption
                    predictions.append({'image_id': id, 'caption': cap})
                else:  # this remains a groundtruth caption
                    gte['annotations'].append({'caption': cap, 'image_id': id, 'id': cap_id})
                    cap_id += 1

        # dump out the new groundtruth and prediction json files
        json.dump(gte, open(tmp_file_gt, 'w'))
        json.dump(predictions, open(tmp_file_pr, 'w'))

        # calculate the language stats
        lang_stats = utils.language_eval(tmp_file_gt, tmp_file_pr)
        for k, v in lang_stats.items():
            if k not in scores:
                scores[k] = list()
            scores[k].append(v)

    print('------------ scores after %d runs ------------' % runs)
    print(scores)
    for k, v in scores.items():
        print(k, statistics.mean(v), statistics.stdev(v))

    if 0:
        ######################################## Compare training scores with overfitting
        # setup paths
        cocofmt_file = os.path.join('datasets', 'msvd', 'metadata', 'msvd_train_cocofmt.json')
        # cocofmt_file = os.path.join('datasets', 'msrvtt', 'metadata', 'msrvtt_train_cocofmt.json')