def language_eval_excoco(predictions, predictions_bleu, sents_label_eval, loader): Scorer = CiderD() Bleu_scorer = Bleu(4) METEOR_scorer = Meteor() ROUGE_scorer = Rouge() c_score, _ = Scorer.compute_score(sents_label_eval, predictions) b_score, _ = Bleu_scorer.compute_score(sents_label_eval, predictions_bleu) m_score, _ = METEOR_scorer.compute_score(sents_label_eval, predictions_bleu) r_score, _ = ROUGE_scorer.compute_score(sents_label_eval, predictions_bleu) print('Evaluating {} samples'.format(len(predictions))) print('Bleu_1 : ' + str(b_score[0])) print('Bleu_2 : ' + str(b_score[1])) print('Bleu_3 : ' + str(b_score[2])) print('Bleu_4 : ' + str(b_score[3])) print('METEOR : ' + str(m_score)) print('ROUGE_L : ' + str(r_score)) print('CIDEr : ' + str(c_score)) lang_stat = {} lang_stat['BLEU_1'] = b_score[0] lang_stat['BLEU_2'] = b_score[1] lang_stat['BLEU_3'] = b_score[2] lang_stat['BLEU_4'] = b_score[3] lang_stat['METEOR'] = m_score lang_stat['ROUGE_L'] = r_score lang_stat['CIDEr'] = c_score return lang_stat
def __init__(self, opt): super(get_self_critical_reward, self).__init__() self.vocab_size = opt.vocab_size self.st2towidx = opt.st2towidx self.opt = opt # self.st2towidx.requires_grad=False self.CiderD_scorer = CiderD(df=opt.cached_tokens)
def get_reward_cirder(gen_result, gts_data, opt): global CiderD_scorer if CiderD_scorer is None: # type = 0 # if type == 0: # path_cider = "/media/amds/data/code/cider" # path_idxs = "/media/amds/data/dataset/mscoco" # else: # path_cider = "/home/scw4750/caption/cider" # path_idxs = "/home/scw4750/caption/dataset/mscoco" path_cider = opt.path_cider path_idxs = opt.path_idxs # /home/scw4750/caption/cider # /media/amds/data/code/cider sys.path.append(path_cider) from pyciderevalcap.ciderD.ciderD import CiderD # /home/scw4750/caption/dataset/mscoco # /media/amds/data/dataset/mscoco CiderD_scorer = CiderD(df='coco-train-idxs', path=path_idxs) batch_size = gen_result.size(0) # batch_size = sample_size * seq_per_img seq_per_img = batch_size // len(gts_data) res = OrderedDict() gen_result = gen_result.cpu().numpy() # sample result for i in range(batch_size): res[i] = [array_to_str(gen_result[i])] gts = OrderedDict() for i in range(len(gts_data)): gts[i] = [ array_to_str(gts_data[i][j]) for j in range(len(gts_data[i])) ] res = [{'image_id': i, 'caption': res[i]} for i in range(batch_size)] gts = {i: gts[i % batch_size // seq_per_img] for i in range(batch_size)} _, scores = CiderD_scorer.compute_score(gts, res) sample_mean = np.mean(scores) print('Cider scores: {:.3f} sample:{:.3f}'.format(_, sample_mean)) # diff_result = sample_result - greedy_result # batch_size # scores = scores[:batch_size] - scores[batch_size:] # batch_size * seq_length rewards = np.repeat(scores[:, np.newaxis], gen_result.shape[1], 1) return rewards, sample_mean
def init_scorer(cached_tokens): global CiderD_scorer CiderD_scorer = CiderD_scorer or CiderD(df=cached_tokens) global Bleu_scorer Bleu_scorer = Bleu_scorer or Bleu( 4 ) # I changed this o BLEU 1 - Originally bleu 4 - MISTAKEN: It's just showing the number of avail. metrics
def init_scorer(cached_tokens): global CiderD_scorer CiderD_scorer = CiderD_scorer or CiderD(df=cached_tokens) global Cider_scorer Cider_scorer = Cider_scorer or Cider(df=cached_tokens) global Bleu_scorer Bleu_scorer = Bleu_scorer or Bleu(4)
def get_scorers(cider_idx_path): return { 'cider': CiderD(df=cider_idx_path), 'bleu': Bleu(), 'rouge': Rouge(), 'meteor': Meteor() }
def __init__(self, opt, dataset): super(ReinforceCriterion, self).__init__() self.dataset = dataset self.reward_type = opt.reward_type self.bleu = None if self.reward_type == 'METEOR': from vist_eval.meteor.meteor import Meteor self.reward_scorer = Meteor() elif self.reward_type == 'CIDEr': sys.path.append("cider") from pyciderevalcap.ciderD.ciderD import CiderD self.reward_scorer = CiderD(df=opt.cached_tokens) elif self.reward_type == 'Bleu_4' or self.reward_type == 'Bleu_3': from vist_eval.bleu.bleu import Bleu self.reward_scorer = Bleu(4) self.bleu = int(self.reward_type[-1]) - 1 elif self.reward_type == 'ROUGE_L': from vist_eval.rouge.rouge import Rouge self.reward_scorer = Rouge() else: err_msg = "{} scorer hasn't been implemented".format( self.reward_type) logging.error(err_msg) raise Exception(err_msg)
class Cider: def __init__(self, args): self.cider = CiderD(df='coco-train') with open('data/train_references.pkl') as fid: self.references = pickle.load(fid) def get_scores(self, seqs, images): captions = self._get_captions(seqs) res = [{ 'image_id': i, 'caption': [caption] } for i, caption in enumerate(captions)] gts = { i: self.references[image_id] for i, image_id in enumerate(images) } _, scores = self.cider.compute_score(gts, res) return scores def _get_captions(self, seqs): captions = [self._get_caption(seq) for seq in seqs] return captions def _get_caption(self, seq): words = [] for word in seq: words.append(str(word)) if word == 0: break caption = ' '.join(words) return caption
def init_scorer(cached_tokens): global CiderD_scorer CiderD_scorer = CiderD_scorer or CiderD(df=cached_tokens) global Bleu_scorer Bleu_scorer = Bleu_scorer or Bleu(4) global Bert_scorer Bert_scorer = Bert_scorer or BertScorer( verbose=False, all_layers=False, lang='en')
def init_scorer(cached_tokens): global CiderD_scorer CiderD_scorer = CiderD_scorer or CiderD(df=cached_tokens) global Bleu_scorer Bleu_scorer = Bleu_scorer or Bleu(4) global Meteor_scorer Meteor_scorer = Meteor() global Rouge_scorer Rouge_scorer = Rouge()
def init_scorer(cache_tokens): global CiderD_scorer if CiderD_scorer is None: CiderD_scorer = CiderD(df=cache_tokens) else: CiderD_scorer = CiderD_scorer # CiderD_scorer = CiderD_scorer or CiderD(df=cache_tokens) global Bleu_scorer if Bleu_scorer is None: Bleu_scorer = Bleu(4) else: Bleu_scorer = Bleu_scorer
from __future__ import division from __future__ import print_function import numpy as np import time import misc.utils as utils from collections import OrderedDict import torch from torch.autograd import Variable import sys sys.path.append("cider") from pyciderevalcap.ciderD.ciderD import CiderD #from pyciderevalcap.cider.cider import Cider CiderD_scorer = CiderD(df='coco-train-idxs') #CiderD_scorer = CiderD(df='corpus') def array_to_str(arr): out = '' for i in range(len(arr)): out += str(arr[i]) + ' ' if arr[i] == 0: break return out.strip() def get_self_critical_reward(model, fc_feats, att_feats, data, gen_result): batch_size = gen_result.size(0)# batch_size = sample_size * seq_per_img seq_per_img = batch_size // len(data['gts']) # get greedy decoding baseline
def init_cider_scorer(cached_tokens): global CiderD_scorer CiderD_scorer = CiderD_scorer or CiderD(df=cached_tokens)
def train(model, criterion, optimizer, train_loader, val_loader, opt, rl_criterion=None): infos = { 'iter': 0, 'epoch': 0, 'start_epoch': 0, 'best_score': float('-inf'), 'best_iter': 0, 'best_epoch': opt.max_epochs } checkpoint_checked = False rl_training = False seq_per_img = train_loader.get_seq_per_img() infos_history = {} if os.path.exists(opt.start_from): # loading the same model file at a different experiment dir start_from_file = os.path.join( opt.start_from, os.path.basename(opt.model_file)) if os.path.isdir( opt.start_from) else opt.start_from logger.info('Loading state from: %s', start_from_file) checkpoint = torch.load(start_from_file) model.load_state_dict(checkpoint['model']) infos = checkpoint['infos'] infos['start_epoch'] = infos['epoch'] checkpoint_checked = True # this epoch is already checked else: logger.info('No checkpoint found! Training from the scratch') if opt.use_rl == 1 and opt.use_rl_after == 0: opt.use_rl_after = infos['epoch'] opt.use_cst_after = infos['epoch'] train_loader.set_current_epoch(infos['epoch']) while True: t_start = time.time() model.train() data = train_loader.get_batch() feats = [Variable(feat, volatile=False) for feat in data['feats']] labels = Variable(data['labels'], volatile=False) masks = Variable(data['masks'], volatile=False) if torch.cuda.is_available(): feats = [feat.cuda() for feat in feats] labels = labels.cuda() masks = masks.cuda() # implement scheduled sampling opt.ss_prob = 0 if opt.use_ss == 1 and infos['epoch'] >= opt.use_ss_after: annealing_prob = opt.ss_k / (opt.ss_k + np.exp( (infos['epoch'] - opt.use_ss_after) / opt.ss_k)) opt.ss_prob = min(1 - annealing_prob, opt.ss_max_prob) model.set_ss_prob(opt.ss_prob) if opt.use_rl == 1 and infos[ 'epoch'] >= opt.use_rl_after and not rl_training: logger.info('Using RL objective...') rl_training = True bcmr_scorer = { 'Bleu_4': Bleu(), 'CIDEr': CiderD(df=opt.train_cached_tokens), 'METEOR': Meteor(), 'ROUGE_L': Rouge() }[opt.eval_metric] # logger.info('loading gt refs: %s', train_loader.cocofmt_file) # gt_refs = utils.load_gt_refs(train_loader.cocofmt_file) mixer_from = opt.mixer_from if opt.use_mixer == 1 and rl_training: # -1 for annealing if opt.mixer_from == -1: annealing_mixer = opt.seq_length - int( np.ceil((infos['epoch'] - opt.use_rl_after + 1) / float(opt.mixer_descrease_every))) mixer_from = max(1, annealing_mixer) model.set_mixer_from(mixer_from) scb_captions = opt.scb_captions if opt.use_cst == 1 and rl_training: if opt.scb_captions == -1: annealing_robust = int( np.ceil((infos['epoch'] - opt.use_cst_after + 1) / float(opt.cst_increase_every))) scb_captions = min(annealing_robust, seq_per_img - 1) optimizer.zero_grad() model.set_seq_per_img(seq_per_img) if rl_training: # using mixer pred, model_res, logprobs = model(feats, labels) if opt.use_cst == 0: # greedy decoding baseline in SCST paper greedy_baseline, _ = model.sample( [Variable(f.data, volatile=True) for f in feats], { 'sample_max': 1, 'expand_feat': opt.expand_feat }) if opt.use_cst == 1: bcmrscores = data['bcmrscores'] reward, m_score, g_score = utils.get_cst_reward( model_res, data['gts'], bcmr_scorer, bcmrscores=bcmrscores, expand_feat=opt.expand_feat, seq_per_img=train_loader.get_seq_per_img(), scb_captions=scb_captions, scb_baseline=opt.scb_baseline, use_eos=opt.use_eos, use_mixer=opt.use_mixer) else: # use greedy baseline by default, compute self-critical reward reward, m_score, g_score = utils.get_self_critical_reward( model_res, greedy_baseline, data['gts'], bcmr_scorer, expand_feat=opt.expand_feat, seq_per_img=train_loader.get_seq_per_img(), use_eos=opt.use_eos) loss = rl_criterion( model_res, logprobs, Variable(torch.from_numpy(reward).float().cuda(), requires_grad=False)) else: pred = model(feats, labels)[0] loss = criterion(pred, labels[:, 1:], masks[:, 1:]) loss.backward() clip_grad_norm(model.parameters(), opt.grad_clip) optimizer.step() infos['TrainLoss'] = loss.data[0] infos['mixer_from'] = mixer_from infos['scb_captions'] = scb_captions if infos['iter'] % opt.print_log_interval == 0: elapsed_time = time.time() - t_start log_info = [('Epoch', infos['epoch']), ('Iter', infos['iter']), ('Loss', infos['TrainLoss'])] if rl_training: log_info += [('Reward', np.mean(reward[:, 0])), ('{} (m)'.format(opt.eval_metric), m_score), ('{} (b)'.format(opt.eval_metric), g_score)] if opt.use_ss == 1: log_info += [('ss_prob', opt.ss_prob)] if opt.use_mixer == 1: log_info += [('mixer_from', mixer_from)] if opt.use_cst == 1: log_info += [('scb_captions', scb_captions)] log_info += [('Time', elapsed_time)] logger.info( '%s', '\t'.join(['{}: {}'.format(k, v) for (k, v) in log_info])) infos['iter'] += 1 if infos['epoch'] < train_loader.get_current_epoch(): infos['epoch'] = train_loader.get_current_epoch() checkpoint_checked = False learning_rate = utils.adjust_learning_rate( opt, optimizer, infos['epoch'] - infos['start_epoch']) logger.info('===> Learning rate: %f: ', learning_rate) if (infos['epoch'] >= opt.save_checkpoint_from and infos['epoch'] % opt.save_checkpoint_every == 0 and not checkpoint_checked): # evaluate the validation performance results = validate(model, criterion, val_loader, opt) logger.info( 'Validation output: %s', json.dumps(results['scores'], indent=4, sort_keys=True)) infos.update(results['scores']) check_model(model, opt, infos, infos_history) checkpoint_checked = True if (infos['epoch'] >= opt.max_epochs or infos['epoch'] - infos['best_epoch'] > opt.max_patience): logger.info('>>> Terminating...') break return infos
import cPickle as pickle import os import sys sys.path.append('/home/llj/caffe/examples/caption-eval/coco-caption') from pycocoevalcap.bleu.bleu import Bleu from pycocoevalcap.rouge.rouge import Rouge from pycocoevalcap.cider.cider import Cider from pycocoevalcap.meteor.meteor import Meteor from pyciderevalcap.ciderD.ciderD import CiderD from collections import defaultdict CiderD_scorer = CiderD(df='msvd') ### need to change for msvd def score_all(ref, hypo): scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] final_scores = {} for scorer, method in scorers: score, scores = scorer.compute_score(ref, hypo) if type(score) == list: for m, s in zip(method, score): final_scores[m] = s else: final_scores[method] = score return final_scores def score(ref, hypo):
def get_sample_reward_aic(sample_res, gts_data, gamma, vocab, opt): batch_size = sample_res.size(0) seq_length = sample_res.size(1) global CiderD_scorer global Bleu_scorer global Rouge_scorer if CiderD_scorer is None: # type = 0 # if type == 0: # path_cider = "/media/amds/data/code/cider" # path_idxs = "/media/amds/data/dataset/mscoco" # else: # path_cider = "/home/scw4750/caption/cider" # path_idxs = "/home/scw4750/caption/dataset/mscoco" path_cider = opt.path_cider path_idxs = opt.path_idxs # /home/scw4750/caption/cider # /media/amds/data/code/cider sys.path.append(path_cider) from pyciderevalcap.ciderD.ciderD import CiderD from pyciderevalcap.bleu.bleu import Bleu from pyciderevalcap.rouge.rouge import Rouge from pyciderevalcap.meteor.meteor import Meteor # /home/scw4750/caption/dataset/mscoco # /media/amds/data/dataset/mscoco CiderD_scorer = CiderD(df=opt.cider_idxs, path=path_idxs) Bleu_scorer = Bleu() Rouge_scorer = Rouge() Meteor_scorer = Meteor() batch_size = sample_res.size(0) # batch_size = sample_size * seq_per_img seq_per_img = batch_size // len(gts_data) res = OrderedDict() sample_res = sample_res.cpu().numpy() # sample result for i in range(batch_size): res[i] = [array_to_str_aic(sample_res[i], vocab)] gts = OrderedDict() for i in range(len(gts_data)): gts[i] = [ array_to_str_aic(gts_data[i][j], vocab) for j in range(len(gts_data[i])) ] res = [{'image_id': i, 'caption': res[i]} for i in range(batch_size)] gts = {i: gts[i // seq_per_img] for i in range(batch_size)} if opt.rl_metric == 'CIDEr': _, scores = CiderD_scorer.compute_score(gts, res) elif opt.rl_metric == 'ROUGE_L': _, scores = Rouge_scorer.compute_score(gts, res) elif opt.rl_metric == 'Bleu_4': _, scores = Bleu_scorer.compute_score(gts, res) _ = _[-1] scores = np.array(scores[-1]) elif opt.rl_metric == 'AVG': d_, d_scores = CiderD_scorer.compute_score(gts, res) b_, b_scores = Bleu_scorer.compute_score(gts, res) r_, r_scores = Rouge_scorer.compute_score(gts, res) b_ = b_[-1] b_scores = np.array(b_scores[-1]) _ = (d_ + b_ + r_) / 3 scores = (d_scores + b_scores + r_scores) / 3 elif opt.rl_metric == 'Meteor': _, scores = Meteor_scorer.compute_score(gts, res) # sample batch sample_mean = np.mean(scores) print('scores: {:.3f} sample:{:.3f}'.format(_, sample_mean)) # batch_size sample_reward = scores # seq_length list_gamma = np.logspace(seq_length - 1, 0, seq_length, base=gamma) # batch_size * seq_length batch_gamma = np.repeat(list_gamma[np.newaxis, :], batch_size, 0) # batch_size * seq_length batch_sample_reward = np.repeat(sample_reward[:, np.newaxis], seq_length, 1) # batch_size * (seq_length+1) full_sample_reward = batch_gamma * batch_sample_reward # sample_reward : batch_size # sample_mean : 1 # full_sample_reward : batch_size * (seq_length+1) return full_sample_reward, sample_mean
class get_self_critical_reward(nn.Module): def __init__(self, opt): super(get_self_critical_reward, self).__init__() self.vocab_size = opt.vocab_size self.st2towidx = opt.st2towidx self.opt = opt # self.st2towidx.requires_grad=False self.CiderD_scorer = CiderD(df=opt.cached_tokens) def forward(self, gen_input, greedy_input, gt_gts, ncap): gen_txt_seq, gen_bn_seq, gen_vis_seq = gen_input greedy_txt_seq, greedy_bn_seq, greedy_vis_seq = greedy_input self.st2towidx = self.st2towidx.type_as(gen_txt_seq) batch_size = gen_txt_seq.size(0) seq_per_img = batch_size // gt_gts.size(0) gen_result = gen_txt_seq.new(gen_txt_seq.size()).zero_() greedy_result = greedy_txt_seq.new(greedy_txt_seq.size()).zero_() gen_mask = gen_txt_seq < self.vocab_size gen_vis_seq = gen_vis_seq.view(batch_size,-1) gen_bn_seq = gen_bn_seq.view(batch_size, -1) # compose the seq gen_result[gen_mask] = gen_txt_seq[gen_mask] gen_vis_idx = gen_vis_seq[gen_mask==0]*2 + gen_bn_seq[gen_mask==0] - 1 gen_result[gen_mask==0] = self.st2towidx[gen_vis_idx] greedy_mask = greedy_txt_seq < self.vocab_size greedy_vis_seq = greedy_vis_seq.view(batch_size,-1) greedy_bn_seq = greedy_bn_seq.view(batch_size, -1) # compose the seq greedy_result[greedy_mask] = greedy_txt_seq[greedy_txt_seq < self.vocab_size] greedy_vis_idx = greedy_vis_seq[greedy_mask==0]*2 + greedy_bn_seq[greedy_mask==0] - 1 greedy_result[greedy_mask==0] = self.st2towidx[greedy_vis_idx] res = OrderedDict() gen_result = gen_result.cpu().numpy() greedy_result = greedy_result.cpu().numpy() for i in range(batch_size): res[i] = [array_to_str(gen_result[i])] for i in range(batch_size): res[batch_size + i] = [array_to_str(greedy_result[i])] gts = OrderedDict() for i in range(batch_size): gts_np = gt_gts[i][:ncap.data[i]].data.cpu().numpy() gts[i] = [array_to_str(gts_np[j]) for j in range(len(gts_np))] # caption = utils.decode_normal(self.opt.itow, torch.from_numpy(gen_result)) # pdb.set_trace() # print(caption[0]) # utils.decode_normal(self.opt.itow, gt_gts.data.view(-1,20)) #_, scores = Bleu(4).compute_score(gts, res) #scores = np.array(scores[3]) res = [{'image_id':i, 'caption': res[i]} for i in range(2 * batch_size)] gts = {i: gts[i % batch_size // seq_per_img] for i in range(2 * batch_size)} _, scores = self.CiderD_scorer.compute_score(gts, res) # print(_) scores = scores[:batch_size] - scores[batch_size:] rewards = np.repeat(scores[:, np.newaxis], gen_result.shape[1], 1) return rewards, _
def __init__(self, args): self.cider = CiderD(df='coco-train') with open('data/train_references.pkl') as fid: self.references = pickle.load(fid)
def train(opt): # setup dataloader loader = DataLoader(opt) opt.vocab_size = loader.vocab_size opt.seq_length = loader.seq_length #set the checkpoint path opt.checkpoint_path = os.path.join(opt.checkpoint_path, opt.id) isExists = os.path.exists(opt.checkpoint_path) if not isExists: os.makedirs(opt.checkpoint_path) os.makedirs(opt.checkpoint_path + '/logs') print(opt.checkpoint_path + ' creating !') else: print(opt.checkpoint_path + ' already exists!') tb_summary_writer = tb and tb.SummaryWriter(opt.checkpoint_path) infos = {} histories = {} if opt.start_from is not None: # open old infos and check if models are compatible with open( os.path.join( opt.checkpoint_path, 'infos_' + opt.id + format(int(opt.start_from), '04') + '.pkl')) as f: infos = cPickle.load(f) saved_model_opt = infos['opt'] need_be_same = [ "caption_model", "att_feat_size", "rnn_size", "input_encoding_size" ] for checkme in need_be_same: assert vars(saved_model_opt)[checkme] == vars( opt )[checkme], "Command line argument and saved model disagree on '%s' " % checkme if os.path.isfile( os.path.join( opt.checkpoint_path, 'histories_' + opt.id + format(int(opt.start_from), '04') + '.pkl')): with open( os.path.join( opt.checkpoint_path, 'histories_' + opt.id + format(int(opt.start_from), '04') + '.pkl')) as f: histories = cPickle.load(f) iteration = infos.get('iter', 0) epoch = infos.get('epoch', 0) val_result_history = histories.get('val_result_history', {}) loss_history = histories.get('loss_history', {}) word_loss_history = histories.get('word_loss_history', {}) MAD_loss_history = histories.get('MAD_loss_history', {}) SAP_loss_history = histories.get('SAP_loss_history', {}) ss_prob_history = histories.get('ss_prob_history', {}) lr_history = histories.get('lr_history', {}) loader.iterators = infos.get('iterators', loader.iterators) loader.split_ix = infos.get('split_ix', loader.split_ix) if opt.load_best_score == 1: best_val_score = infos.get('best_val_score', None) #set up model, assure in training mode threshold = opt.threshold sc_flag = False num_gpu = opt.num_gpu model = models.setup(opt).cuda(device=0) model.train() update_lr_flag = True dp_model = torch.nn.parallel.DataParallel(model) optimizer = optim.Adam(model.parameters(), opt.learning_rate, (opt.optim_alpha, opt.optim_beta), opt.optim_epsilon, weight_decay=opt.weight_decay) # Load the optimizer if vars(opt).get('start_from', None) is not None and os.path.isfile( os.path.join( opt.checkpoint_path, 'optimizer' + opt.id + format(int(opt.start_from), '04') + '.pth')): optimizer.load_state_dict( torch.load( os.path.join( opt.checkpoint_path, 'optimizer' + opt.id + format(int(opt.start_from), '04') + '.pth'))) if epoch > opt.scheduled_sampling_start and opt.scheduled_sampling_start >= 0: frac = (epoch - opt.scheduled_sampling_start ) // opt.scheduled_sampling_increase_every opt.ss_prob = min(opt.scheduled_sampling_increase_prob * frac, opt.scheduled_sampling_max_prob) model.ss_prob = opt.ss_prob optimizer.zero_grad() accumulate_iter = 0 train_loss = 0 subsequent_mat = np.load('data/markov_mat.npy') subsequent_mat = torch.from_numpy(subsequent_mat).cuda(device=0).float() subsequent_mat_all = subsequent_mat.clone() # for multi-GPU training for i in range(opt.num_gpu - 1): subsequent_mat_all = torch.cat([subsequent_mat_all, subsequent_mat], dim=0) while True: if update_lr_flag: # Assign the learning rate if epoch > opt.learning_rate_decay_start and opt.learning_rate_decay_start >= 0: frac = (epoch - opt.learning_rate_decay_start ) // opt.learning_rate_decay_every decay_factor = opt.learning_rate_decay_rate**frac opt.current_lr = opt.learning_rate * decay_factor else: opt.current_lr = opt.learning_rate for group in optimizer.param_groups: group['lr'] = opt.current_lr # Assign the scheduled sampling prob if epoch > opt.scheduled_sampling_start and opt.scheduled_sampling_start >= 0: frac = (epoch - opt.scheduled_sampling_start ) // opt.scheduled_sampling_increase_every opt.ss_prob = min(opt.scheduled_sampling_increase_prob * frac, opt.scheduled_sampling_max_prob) model.ss_prob = opt.ss_prob # If start self critical training if sc_flag == False and opt.self_critical_after != -1 and epoch >= opt.self_critical_after: print('initializing CIDEr scorer...') s = time.time() global CiderD_scorer if (CiderD_scorer is None): CiderD_scorer = CiderD(df=opt.cached_tokens) #takes about 30s print('initlizing CIDEr scorers in {:3f}s'.format( time.time() - s)) sc_flag = True opt.learning_rate_decay_every = opt.learning_rate_decay_every * 2 #default 5 for xe, 10 for scst update_lr_flag = False print('current_lr is {}'.format(opt.current_lr)) start = time.time() data = loader.get_batch('train', opt.batch_size) torch.cuda.synchronize() fc_feats = None att_feats = None tmp = [ data['fc_feats'], data['labels'], data['masks'], data['att_feats'], data['attr_labels'], data['subsequent_labels'] ] tmp = [ _ if _ is None else torch.from_numpy(_).cuda(device=0) for _ in tmp ] fc_feats, labels, masks, att_feats, attr_labels, subsequent_labels = tmp #convert 1-1000 to 0-999 (perhaps done in preprocessing) subsequent_labels = subsequent_labels - 1 subsequent_mask = (subsequent_labels[:, 1:] >= 0).float() subsequent_labels = torch.where( subsequent_labels > 0, subsequent_labels, torch.zeros_like(subsequent_labels).int().cuda(device=0)) print('Read and process data:', time.time() - start) if not sc_flag: SAP_loss, word_loss, MAD_loss = dp_model( fc_feats, att_feats, labels, masks, attr_labels, subsequent_labels, subsequent_mask, subsequent_mat_all) SAP_loss = SAP_loss.mean() word_loss = word_loss.mean() MAD_loss = MAD_loss.mean() accumulate_iter = accumulate_iter + 1 loss = (word_loss + 0.2 * SAP_loss + 0.2 * MAD_loss) / opt.accumulate_number loss.backward() else: st = time.time() sm = torch.zeros([num_gpu, 1]).cuda( device=0) #indexs for sampling by probabilities gen_result, sample_logprobs, _ = dp_model(fc_feats, att_feats, attr_labels, subsequent_mat_all, sm, mode='sample') dp_model.eval() with torch.no_grad(): greedy_res, _, _ = dp_model(fc_feats, att_feats, attr_labels, subsequent_mat_all, mode='sample') dp_model.train() ed = time.time() print('GPU time is : {}s'.format(ed - st)) reward = get_self_critical_reward(gen_result, greedy_res, data['gts']) word_loss = dp_model(sample_logprobs, gen_result.data, torch.from_numpy(reward).float().cuda(), mode='scst_forward') word_loss = word_loss.mean() loss = word_loss #forward to minimize SAP loss and MAD loss SAP_loss, _, MAD_loss = dp_model(fc_feats, att_feats, labels, masks, attr_labels, subsequent_labels, subsequent_mask, subsequent_mat_all) SAP_loss = SAP_loss.mean() MAD_loss = MAD_loss.mean() loss = loss + 0.2 * SAP_loss + 0.2 * MAD_loss loss.backward() accumulate_iter = accumulate_iter + 1 if accumulate_iter % opt.accumulate_number == 0: utils.clip_gradient(optimizer, opt.grad_clip) optimizer.step() optimizer.zero_grad() iteration += 1 accumulate_iter = 0 train_loss = loss.item() * opt.accumulate_number end = time.time() #you can record the training log if you need #text_file = open(opt.checkpoint_path+'/logs/train_log_'+opt.id+'.txt', "aw") if not sc_flag: print("iter {} (epoch {}), SAP_loss = {:.3f}, word_loss = {:.3f}, MAD_loss = {:.3f} time/batch = {:.3f}" \ .format(iteration, epoch,SAP_loss, word_loss,MAD_loss, end - start)) #text_file.write("iter {} (epoch {}),SAP_loss = {:.3f}, word_loss {:.3f}, MAD_loss {:.3f},time/batch = {:.3f}\n" \ # .format(iteration, epoch,SAP_loss, word_loss, MAD_loss, end - start)) else: print("iter {} (epoch {}),SAP_loss = {:.3f}, avg_reward = {:.3f},MAD_loss = {:.3f} time/batch = {:.3f}" \ .format(iteration, epoch,SAP_loss,np.mean(reward[:, 0]),MAD_loss, end - start)) #text_file.write("iter {} (epoch {}), avg_reward = {:.3f} MAD_loss ={:.3f}, time/batch = {:.3f}\n" \ # .format(iteration, epoch, np.mean(reward[:, 0]), MAD_loss, end - start)) #text_file.close() torch.cuda.synchronize() # Update the iteration and epoch if data['bounds']['wrapped']: epoch += 1 update_lr_flag = True # Write the training loss summary if (iteration % opt.losses_log_every == 0) and (accumulate_iter % opt.accumulate_number == 0): add_summary_value(tb_summary_writer, 'word_loss', word_loss.item(), iteration) add_summary_value(tb_summary_writer, 'MAD_loss', MAD_loss.item(), iteration) add_summary_value(tb_summary_writer, 'SAP_loss', SAP_loss.item(), iteration) add_summary_value(tb_summary_writer, 'learning_rate', opt.current_lr, iteration) add_summary_value(tb_summary_writer, 'scheduled_sampling_prob', model.ss_prob, iteration) if sc_flag: add_summary_value(tb_summary_writer, 'avg_reward', np.mean(reward[:, 0]), iteration) loss_history[iteration] = train_loss if not sc_flag else np.mean( reward[:, 0]) word_loss_history[iteration] = word_loss.item() SAP_loss_history[iteration] = SAP_loss.item() MAD_loss_history[iteration] = MAD_loss.item() lr_history[iteration] = opt.current_lr ss_prob_history[iteration] = model.ss_prob # make evaluation on validation set, and save model if (iteration % opt.save_checkpoint_every == 0) and (accumulate_iter % opt.accumulate_number == 0): # eval model eval_kwargs = { 'split': 'val', 'dataset': opt.input_json, 'num_images': -1, 'index_eval': 1, 'id': opt.id, 'beam': opt.beam, 'verbose_loss': 1, 'checkpoint_path': opt.checkpoint_path } eval_kwargs.update(vars(opt)) val_loss, predictions, lang_stats, precision, recall = eval_utils.eval_split( dp_model, loader, subsequent_mat_all, eval_kwargs) # Write validation result into summary add_summary_value(tb_summary_writer, 'validation loss', val_loss, iteration) if lang_stats is not None: for k, v in lang_stats.items(): add_summary_value(tb_summary_writer, k, v, iteration) val_result_history[iteration] = { 'loss': val_loss, 'lang_stats': lang_stats, 'predictions': predictions } #save lang stats f_lang = open( opt.checkpoint_path + '/logs/lang_' + opt.id + '.txt', 'aw') f_lang.write( str(iteration) + ' ' + str(iteration / opt.save_checkpoint_every) + '\n') f_lang.write('val loss ' + str(val_loss) + '\n') for key_lang in lang_stats: f_lang.write(key_lang + ' ' + str(lang_stats[key_lang]) + '\n') f_lang.write('precision ' + str(precision) + ' recall ' + str(recall) + '\n') f_lang.close() # Save model if is improving on validation result if opt.language_eval == 1: current_score = lang_stats['CIDEr'] else: current_score = -val_loss best_flag = False save_id = iteration / opt.save_checkpoint_every if best_val_score is None or current_score > best_val_score or current_score > threshold: best_val_score = current_score best_flag = True ##only save the improved models or when the CIDEr-D is larger than a given threshold checkpoint_path = os.path.join( opt.checkpoint_path, 'model' + opt.id + format(int(save_id), '04') + '.pth') torch.save(model.state_dict(), checkpoint_path) print("model saved to {}".format(checkpoint_path)) optimizer_path = os.path.join( opt.checkpoint_path, 'optimizer' + opt.id + format(int(save_id), '04') + '.pth') torch.save(optimizer.state_dict(), optimizer_path) #record the lang stats for saved mdoel f_lang = open( opt.checkpoint_path + '/logs/Best_lang_' + opt.id + '.txt', 'aw') f_lang.write( str(iteration) + ' ' + str(iteration / opt.save_checkpoint_every) + '\n') f_lang.write('val loss ' + str(val_loss) + '\n') for key_lang in lang_stats: f_lang.write(key_lang + ' ' + str(lang_stats[key_lang]) + '\n') f_lang.write('precision ' + str(precision) + ' recall ' + str(recall) + '\n') f_lang.close() # Dump miscalleous informations infos['iter'] = iteration infos['epoch'] = epoch infos['iterators'] = loader.iterators infos['split_ix'] = loader.split_ix infos['best_val_score'] = best_val_score infos['opt'] = opt infos['vocab'] = loader.get_vocab() histories['val_result_history'] = val_result_history histories['loss_history'] = loss_history histories['word_loss_history'] = loss_history histories['MAD_loss_history'] = MAD_loss_history histories['SAP_loss_history'] = SAP_loss_history histories['lr_history'] = lr_history histories['ss_prob_history'] = ss_prob_history with open( os.path.join( opt.checkpoint_path, 'infos_' + opt.id + format(int(save_id), '04') + '.pkl'), 'wb') as f: cPickle.dump(infos, f) with open( os.path.join( opt.checkpoint_path, 'histories_' + opt.id + format(int(save_id), '04') + '.pkl'), 'wb') as f: cPickle.dump(histories, f) # Stop if reaching max epochs if epoch >= opt.max_epochs and opt.max_epochs != -1: break
def eval_split(model, crit, loader, eval_kwargs={}): verbose = eval_kwargs.get('verbose', True) verbose_beam = eval_kwargs.get('verbose_beam', 1) verbose_loss = eval_kwargs.get('verbose_loss', 1) num_images = eval_kwargs.get('num_images', eval_kwargs.get('val_images_use', -1)) split = eval_kwargs.get('split', None) if split == 'onlinetest': split = None lang_eval = eval_kwargs.get('language_eval', 0) dataset = eval_kwargs.get('dataset', 'coco') beam_size = eval_kwargs.get('beam_size', 1) ciderd = eval_kwargs.get('ciderd', False) annFile = eval_kwargs.get('annfile', None) # Make sure in the evaluation mode model.eval() loader.reset_iterator(split) n = 0 loss = 0 loss_sum = 0 loss_evals = 1e-8 predictions = [] # produce CiderD score for each generated caption if ciderd: CiderD_scorer = CiderD(df='coco-train-idxs') while True: data = loader.get_batch(split) n = n + loader.batch_size if data.get('labels', None) is not None and verbose_loss: # forward the model to get loss tmp = [ data['fc_feats'], data['att_feats'], data['labels'], data['masks'], data['att_masks'] ] tmp = [ torch.from_numpy(_).cuda() if _ is not None else _ for _ in tmp ] fc_feats, att_feats, labels, masks, att_masks = tmp with torch.no_grad(): loss = crit( model(fc_feats, att_feats, labels, att_masks)[0], labels[:, 1:], masks[:, 1:]).item() loss_sum = loss_sum + loss loss_evals = loss_evals + 1 # forward the model to also get generated samples for each image # Only leave one feature for each image, in case duplicate sample tmp = [ data['fc_feats'][np.arange(loader.batch_size) * loader.seq_per_img], data['att_feats'][np.arange(loader.batch_size) * loader.seq_per_img], data['att_masks'][np.arange(loader.batch_size) * loader.seq_per_img] if data['att_masks'] is not None else None ] tmp = [torch.from_numpy(_).cuda() if _ is not None else _ for _ in tmp] fc_feats, att_feats, att_masks = tmp # forward the model to also get generated samples for each image with torch.no_grad(): outputs = model(fc_feats, att_feats, att_masks, opt=eval_kwargs, mode='sample') seq = outputs[0].data # fproducing the ciderd score, first producing the groudtruth file gts and the evaluated file res if ciderd: gts = {} for i in range(len(data['gts'])): gts[data['infos'][i]['id']] = [ array_to_str(data['gts'][i][j]) for j in range(len(data['gts'][i])) ] gen_result = seq.data.cpu().numpy() res = {} for i in range(len(gen_result)): res[data['infos'][i]['id']] = [array_to_str(gen_result[i])] res_ = [{'image_id': k, 'caption': v} for k, v in res.items()] _, cider_scores = CiderD_scorer.compute_score(gts, res_) # Print beam search if beam_size > 1 and verbose_beam: for i in range(loader.batch_size): print('\n'.join([ utils.decode_sequence(loader.get_vocab(), _['seq'].unsqueeze(0))[0] for _ in model.done_beams[i] ])) print('--' * 10) sents = utils.decode_sequence(loader.get_vocab(), seq) for k, sent in enumerate(sents): if ciderd: entry = { 'image_id': data['infos'][k]['id'], 'caption': sent, 'cider': cider_scores[k] } else: entry = {'image_id': data['infos'][k]['id'], 'caption': sent} if eval_kwargs.get('dump_path', 0) == 1: entry['file_name'] = data['infos'][k]['file_path'] predictions.append(entry) if eval_kwargs.get('dump_images', 0) == 1: # dump the raw image to vis/ folder cmd = 'cp "' + os.path.join( eval_kwargs['image_root'], data['infos'][k]['file_path']) + '" vis/imgs/img' + str( len(predictions)) + '.jpg' # bit gross print(cmd) os.system(cmd) if verbose: if ciderd: print( 'image %s: %s; ciderd: %.3f' % (entry['image_id'], entry['caption'], entry['cider'])) else: print('image %s: %s' % (entry['image_id'], entry['caption'])) # if we wrapped around the split or used up val imgs budget then bail ix0 = data['bounds']['it_pos_now'] ix1 = data['bounds']['it_max'] if num_images != -1: ix1 = min(ix1, num_images) for i in range(n - ix1): predictions.pop() if verbose: print('evaluating validation preformance... %d/%d (%f)' % (ix0 - 1, ix1, loss)) if data['bounds']['wrapped']: break if num_images >= 0 and n >= num_images: break lang_stats = None if lang_eval == 1: lang_stats = language_eval(dataset, predictions, eval_kwargs['id'], split, annFile) # Switch back to training mode model.train() return loss_sum / loss_evals, predictions, lang_stats
def _init_cider_scorer(self): cached_tokens, _ = os.path.splitext(os.path.basename(self.ngram_file)) self.CiderD_scorer = self.CiderD_scorer or CiderD( df=cached_tokens, ngram_file=self.ngram_file)
import time import misc.utils as utils from collections import OrderedDict import torch import sys #sys.path.append("cider") sys.path.append("cider-master") from pyciderevalcap.ciderD.ciderD import CiderD sys.path.append("coco-caption") #from pycocoevalcap.cider.cider import CiderD from pycocoevalcap.bleu.bleu import Bleu CiderD_scorer = None Bleu_scorer = None CiderD_scorer = CiderD(df='corpus') def init_scorer(cached_tokens): global CiderD_scorer CiderD_scorer = CiderD_scorer or CiderD(df=cached_tokens) global Bleu_scorer Bleu_scorer = Bleu_scorer or Bleu(4) def array_to_str(arr): out = '' for i in range(len(arr)): out += str(arr[i]) + ' ' if arr[i] == 0: break
def __init__(self, path_to_cached_tokens, metric_weights): self._scorer = dict( ciderD = CiderD(df=path_to_cached_tokens), cider = Cider(df=path_to_cached_tokens), bleu = BleuSilent(4)) self.weights = metric_weights
tf.logging.info('restoring from checkpoint') else: sess.run(init) start = sess.run(models[0].step) best_res = 0.0 history_cider = [] batch_size = FLAGS.batch_size*FLAGS.ngpu # the number of batches(iterations) per epoch nBatches_epoch = opts.nImgs//batch_size vocab = vocabulary(opts) # The cider_d scorer # TODO create the df CiderD_Scorer = CiderD(df = FLAGS.ached_tokens) def score_seq(gts,seqs): # check if it is valid assert gts.shape[0]%opts.nSeqs_per_img==0 assert len(seqs) == 2 assert seqs[0].shape[0] == seqs[1].shape[0] ==gts.shape[0] assert seqs[0].shape[0]%opts.nSeqs_per_img == 0 batch_size = gts.shape[0] nImage = batch_size//opts.nSeqs_per_img gts = np.reshape(gts,[nImage,opts.nSeqs_per_img,-1]) gts = {i: list(chain.from_iterable(convert_to_str(gts[i]))) for i in xrange(nImage)} baseline_seqs, random_seqs = seqs
def get_self_critical_reward_aic(greedy_res, gen_result, gts_data, alpha, vocab, opt): global CiderD_scorer global Bleu_scorer global Rouge_scorer if CiderD_scorer is None: # type = 0 # if type == 0: # path_cider = "/media/amds/data/code/cider" # path_idxs = "/media/amds/data/dataset/mscoco" # else: # path_cider = "/home/scw4750/caption/cider" # path_idxs = "/home/scw4750/caption/dataset/mscoco" path_cider = opt.path_cider path_idxs = opt.path_idxs # /home/scw4750/caption/cider # /media/amds/data/code/cider sys.path.append(path_cider) from pyciderevalcap.ciderD.ciderD import CiderD from pyciderevalcap.bleu.bleu import Bleu from pyciderevalcap.rouge.rouge import Rouge from pyciderevalcap.meteor.meteor import Meteor # /home/scw4750/caption/dataset/mscoco # /media/amds/data/dataset/mscoco CiderD_scorer = CiderD(df=opt.cider_idxs, path=path_idxs) Bleu_scorer = Bleu() Rouge_scorer = Rouge() Meteor_scorer = Meteor() batch_size = gen_result.size(0) # batch_size = sample_size * seq_per_img seq_per_img = batch_size // len(gts_data) res = OrderedDict() gen_result = gen_result.cpu().numpy() greedy_res = greedy_res.cpu().numpy() # sample result for i in range(batch_size): res[i] = [array_to_str_aic(gen_result[i], vocab)] # greedy result for i in range(batch_size): res[batch_size + i] = [array_to_str_aic(greedy_res[i], vocab)] gts = OrderedDict() for i in range(len(gts_data)): gts[i] = [ array_to_str_aic(gts_data[i][j], vocab) for j in range(len(gts_data[i])) ] res = [{'image_id': i, 'caption': res[i]} for i in range(2 * batch_size)] gts = { i: gts[i % batch_size // seq_per_img] for i in range(2 * batch_size) } if opt.rl_metric == 'CIDEr': _, scores = CiderD_scorer.compute_score(gts, res) elif opt.rl_metric == 'ROUGE_L': _, scores = Rouge_scorer.compute_score(gts, res) elif opt.rl_metric == 'Bleu_4': _, scores = Bleu_scorer.compute_score(gts, res) _ = _[-1] scores = np.array(scores[-1]) elif opt.rl_metric == 'AVG': d_, d_scores = CiderD_scorer.compute_score(gts, res) b_, b_scores = Bleu_scorer.compute_score(gts, res) r_, r_scores = Rouge_scorer.compute_score(gts, res) b_ = b_[-1] b_scores = np.array(b_scores[-1]) _ = (d_ + b_ + r_) / 3 scores = (d_scores + b_scores + r_scores) / 3 elif opt.rl_metric == 'Meteor': _, scores = Meteor_scorer.compute_score(gts, res) sample_mean = np.mean(scores[:batch_size]) greedy_mean = np.mean(scores[batch_size:]) print('scores: {:.3f} sample:{:.3f} greedy:{:.3f}'.format( _, sample_mean, greedy_mean)) # diff_result = sample_result - greedy_result # batch_size scores = scores[:batch_size] - scores[batch_size:] * alpha # batch_size * seq_length rewards = np.repeat(scores[:, np.newaxis], gen_result.shape[1], 1) return rewards, sample_mean, greedy_mean