def __init__(self, model, opt): super(LossWrapper, self).__init__() self.opt = opt self.model = model if opt.label_smoothing > 0: self.crit = utils.LabelSmoothing(smoothing=opt.label_smoothing) else: self.crit = utils.LanguageModelCriterion()
def __init__(self, model, opt): super(LossWrapper, self).__init__() self.opt = opt self.model = model if opt.label_smoothing > 0: self.crit = utils.LabelSmoothing(smoothing=opt.label_smoothing) else: self.crit = utils.LanguageModelCriterion() self.rl_crit = utils.RewardCriterion() self.ppo_crit = utils.PPORewardCriterion() ## Added this for ppo 9/sep/2019 self.old_sample_logprobs = torch.zeros(50,16).to('cuda')
def __init__(self, model, opt): super(LossWrapper, self).__init__() self.opt = opt self.model = model if opt.label_smoothing > 0: self.crit = utils.LabelSmoothing(smoothing=opt.label_smoothing) else: self.crit = utils.LanguageModelCriterion() self.rl_crit = utils.RewardCriterion() if opt.att_supervise: if opt.att_sup_crit == 'KL': self.kl_crit=nn.KLDivLoss(reduction='batchmean') elif opt.att_sup_crit == 'NLL': self.nll = nn.NLLLoss() elif opt.att_sup_crit == 'ExtendNLL': self.extendnll = utils.ExtendNLLCrit() else: raise NotImplementedError self.min_value=1e-8
def __init__(self, model, opt): super(LossWrapper, self).__init__() self.opt = opt self.model = model if opt.label_smoothing > 0: self.crit = utils.LabelSmoothing(smoothing=opt.label_smoothing) else: self.crit = utils.LanguageModelCriterion() self.rl_crit = utils.RewardCriterion() self.struc_crit = utils.StructureLosses(opt) if opt.vse_model != 'None': self.vse = VSEFCModel(opt) for p in self.vse.parameters(): p.requires_grad = False self.retrieval_reward_weight = opt.retrieval_reward_weight # self.vse.load_state_dict({ k[4:]: v for k, v in torch.load(opt.initialize_retrieval).items() if 'vse.' in k }) self.retrieval_reward_weight = 0
def train(opt): # Deal with feature things before anything opt.use_att = utils.if_use_att(opt.caption_model) opt.use_fc = utils.if_use_fc(opt.caption_model) loader = DataLoader(opt) opt.vocab_size = loader.vocab_size opt.seq_length = loader.seq_length infos = load_info(opt) iteration = infos.get('iter', 0) epoch = infos.get('epoch', 0) val_result_history = infos.get('val_result_history', {}) loader.iterators = infos.get('iterators', loader.iterators) loader.split_ix = infos.get('split_ix', loader.split_ix) if opt.load_best_score == 1: best_val_score = infos.get('best_val_score', None) # Define and load model, optimizer, critics decoder = setup(opt).train().cuda() if opt.label_smoothing > 0: crit = utils.LabelSmoothing(smoothing=opt.label_smoothing).cuda() else: crit = utils.LanguageModelCriterion().cuda() # crit = utils.LanguageModelCriterion().cuda() rl_crit = utils.RewardCriterion().cuda() if opt.reduce_on_plateau: optimizer = utils.build_optimizer(decoder.parameters(), opt) optimizer = utils.ReduceLROnPlateau(optimizer, factor=0.5, patience=3) else: optimizer = utils.build_optimizer(decoder.parameters(), opt) # optimizer = utils.build_optimizer(decoder.parameters(), opt) models = {'decoder': decoder} optimizers = {'decoder': optimizer} save_nets_structure(models, opt) load_checkpoint(models, optimizers, opt) print('opt', opt) epoch_done = True sc_flag = False while True: if epoch_done: # Assign the learning rate if epoch > opt.learning_rate_decay_start and opt.learning_rate_decay_start >= 0: frac = (epoch - opt.learning_rate_decay_start ) // opt.learning_rate_decay_every decay_factor = opt.learning_rate_decay_rate**frac opt.current_lr = opt.learning_rate * decay_factor else: opt.current_lr = opt.learning_rate utils.set_lr(optimizer, opt.current_lr) # Assign the scheduled sampling prob if epoch > opt.scheduled_sampling_start and opt.scheduled_sampling_start >= 0: frac = (epoch - opt.scheduled_sampling_start ) // opt.scheduled_sampling_increase_every opt.ss_prob = min(opt.scheduled_sampling_increase_prob * frac, opt.scheduled_sampling_max_prob) decoder.ss_prob = opt.ss_prob # If start self critical training if opt.self_critical_after != -1 and epoch >= opt.self_critical_after: sc_flag = True init_scorer(opt.cached_tokens) else: sc_flag = False epoch_done = False # 1. fetch a batch of data from train split data = loader.get_batch('train') tmp = [ data['fc_feats'], data['att_feats'], data['labels'], data['tags'], data['masks'], data['att_masks'], data['verbs'] ] tmp = [_ if _ is None else torch.from_numpy(_).cuda() for _ in tmp] fc_feats, att_feats, labels, tags, masks, att_masks, weak_relas = tmp vrg_data = {key: data['vrg_data'][key] if data['vrg_data'][key] is None \ else torch.from_numpy(data['vrg_data'][key]).cuda() for key in data['vrg_data']} # 2. Forward model and compute loss torch.cuda.synchronize() optimizer.zero_grad() if not sc_flag: out = decoder(vrg_data, fc_feats, att_feats, labels, weak_relas, att_masks) loss_words = crit(out[0], labels[:, 1:], masks[:, 1:]) loss_tags = crit(out[1], tags[:, 1:], masks[:, 1:]) loss = loss_words + loss_tags * 0.15 else: gen_result, sample_logprobs, core_args = decoder( vrg_data, fc_feats, att_feats, weak_relas, att_masks, opt={ 'sample_max': 0, 'return_core_args': True }, mode='sample') reward = get_self_critical_reward(decoder, core_args, vrg_data, fc_feats, att_feats, weak_relas, att_masks, data, gen_result, opt) loss = rl_crit(sample_logprobs, gen_result.data, torch.from_numpy(reward).float().cuda()) # 3. Update model loss.backward() utils.clip_gradient(optimizer, opt.grad_clip) optimizer.step() train_loss = loss.item() torch.cuda.synchronize() # Update the iteration and epoch iteration += 1 # Write the training loss summary if (iteration % opt.log_loss_every == 0): # logging log logger.info("{} ({}), loss: {:.3f}".format(iteration, epoch, train_loss)) tb.add_values('loss', {'train': train_loss}, iteration) if data['bounds']['wrapped']: epoch += 1 epoch_done = True # Make evaluation and save checkpoint if (opt.save_checkpoint_every > 0 and iteration % opt.save_checkpoint_every == 0) or (opt.save_checkpoint_every == -1 and epoch_done): # eval model eval_kwargs = { 'split': 'val', 'dataset': opt.input_json, 'expand_features': False } eval_kwargs.update(vars(opt)) predictions, lang_stats = eval_utils.eval_split( decoder, loader, eval_kwargs) if opt.reduce_on_plateau: assert 'CIDEr' in lang_stats, 'Error: cider should be in eval list' optimizer.scheduler_step(-lang_stats['CIDEr']) # log val results if not lang_stats is None: logger.info("Scores: {}".format(lang_stats)) tb.add_values('scores', lang_stats, epoch) val_result_history[epoch] = { 'lang_stats': lang_stats, 'predictions': predictions } # Save model if is improving on validation result current_score = 0 if lang_stats is None else lang_stats['CIDEr'] best_flag = False if best_val_score is None or current_score > best_val_score: best_val_score = current_score best_flag = True # Dump miscalleous informations infos['iter'] = iteration infos['epoch'] = epoch infos['iterators'] = loader.iterators infos['split_ix'] = loader.split_ix infos['best_val_score'] = best_val_score infos['opt'] = opt infos['vocab'] = loader.get_vocab() infos['val_result_history'] = val_result_history save_checkpoint(models, optimizers, infos, best_flag, opt) # Stop if reaching max epochs if epoch > opt.max_epochs and opt.max_epochs != -1: break
def train(opt): # Deal with feature things before anything opt.use_att = utils.if_use_att(opt.caption_model) loader = DataLoader(opt) opt.vocab_size = loader.vocab_size opt.seq_length = loader.seq_length tb_summary_writer = tb and tb.SummaryWriter(opt.checkpoint_path) infos = {} histories = {} if opt.start_from is not None: # open old infos and check if models are compatible with open(os.path.join(opt.start_from, 'infos_'+opt.id+'.pkl')) as f: infos = cPickle.load(f) saved_model_opt = infos['opt'] need_be_same=["caption_model", "rnn_type", "rnn_size", "num_layers"] for checkme in need_be_same: assert vars(saved_model_opt)[checkme] == vars(opt)[checkme], "Command line argument and saved model disagree on '%s' " % checkme if os.path.isfile(os.path.join(opt.start_from, 'histories_'+opt.id+'.pkl')): with open(os.path.join(opt.start_from, 'histories_'+opt.id+'.pkl')) as f: histories = cPickle.load(f) iteration = infos.get('iter', 0) epoch = infos.get('epoch', 0) val_result_history = histories.get('val_result_history', {}) loss_history = histories.get('loss_history', {}) lr_history = histories.get('lr_history', {}) ss_prob_history = histories.get('ss_prob_history', {}) loader.iterators = infos.get('iterators', loader.iterators) loader.split_ix = infos.get('split_ix', loader.split_ix) if opt.load_best_score == 1: best_val_score = infos.get('best_val_score', None) model = models.setup(opt).cuda() dp_model = torch.nn.DataParallel(model) epoch_done = True # Assure in training mode dp_model.train() if opt.label_smoothing > 0: crit = utils.LabelSmoothing(smoothing=opt.label_smoothing) else: crit = utils.LanguageModelCriterion() rl_crit = utils.RewardCriterion() if opt.noamopt: assert opt.caption_model == 'transformer' or opt.caption_model == 'relation_transformer', 'noamopt can only work with transformer' optimizer = utils.get_std_opt(model, factor=opt.noamopt_factor, warmup=opt.noamopt_warmup) optimizer._step = iteration elif opt.reduce_on_plateau: optimizer = utils.build_optimizer(model.parameters(), opt) optimizer = utils.ReduceLROnPlateau(optimizer, factor=0.5, patience=3) else: optimizer = utils.build_optimizer(model.parameters(), opt) # Load the optimizer if vars(opt).get('start_from', None) is not None and os.path.isfile(os.path.join(opt.start_from,"optimizer.pth")): optimizer.load_state_dict(torch.load(os.path.join(opt.start_from, 'optimizer.pth'))) while True: if epoch_done: if not opt.noamopt and not opt.reduce_on_plateau: # Assign the learning rate if epoch > opt.learning_rate_decay_start and opt.learning_rate_decay_start >= 0: frac = (epoch - opt.learning_rate_decay_start) // opt.learning_rate_decay_every decay_factor = opt.learning_rate_decay_rate ** frac opt.current_lr = opt.learning_rate * decay_factor else: opt.current_lr = opt.learning_rate utils.set_lr(optimizer, opt.current_lr) # set the decayed rate # Assign the scheduled sampling prob if epoch > opt.scheduled_sampling_start and opt.scheduled_sampling_start >= 0: frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every opt.ss_prob = min(opt.scheduled_sampling_increase_prob * frac, opt.scheduled_sampling_max_prob) model.ss_prob = opt.ss_prob # If start self critical training if opt.self_critical_after != -1 and epoch >= opt.self_critical_after: sc_flag = True init_scorer(opt.cached_tokens) else: sc_flag = False epoch_done = False start = time.time() # Load data from train split (0) data = loader.get_batch('train') print('Read data:', time.time() - start) torch.cuda.synchronize() start = time.time() #SIMAO tmp = [data['fc_feats'], data['att_feats'], data['labels'], data['masks'], data['att_masks']] tmp = [_ if _ is None else torch.from_numpy(_).cuda() for _ in tmp] fc_feats, att_feats, labels, masks, att_masks = tmp if opt.use_box: boxes = data['boxes'] if data['boxes'] is None else torch.from_numpy(data['boxes']).cuda() optimizer.zero_grad() if not sc_flag: if opt.use_box: loss = crit(dp_model(fc_feats, att_feats, boxes, labels, att_masks), labels[:,1:], masks[:,1:]) else: loss = crit(dp_model(fc_feats, att_feats, labels, att_masks), labels[:,1:], masks[:,1:]) else: if opt.use_box: gen_result, sample_logprobs = dp_model(fc_feats, att_feats, boxes, att_masks, opt={'sample_max':0}, mode='sample') reward = get_self_critical_reward(dp_model, fc_feats, att_feats, boxes, att_masks, data, gen_result, opt) else: gen_result, sample_logprobs = dp_model(fc_feats, att_feats, att_masks, opt={'sample_max':0}, mode='sample') reward = get_self_critical_reward(dp_model, fc_feats, att_feats, None, att_masks, data, gen_result, opt) loss = rl_crit(sample_logprobs, gen_result.data, torch.from_numpy(reward).float().cuda()) loss.backward() utils.clip_gradient(optimizer, opt.grad_clip) optimizer.step() train_loss = loss.item() torch.cuda.synchronize() end = time.time() if not sc_flag: print("iter {} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(iteration, epoch, train_loss, end - start)) else: print("iter {} (epoch {}), avg_reward = {:.3f}, time/batch = {:.3f}" \ .format(iteration, epoch, np.mean(reward[:,0]), end - start)) # Update the iteration and epoch iteration += 1 if data['bounds']['wrapped']: epoch += 1 epoch_done = True # Write the training loss summary if (iteration % opt.losses_log_every == 0): add_summary_value(tb_summary_writer, 'train_loss', train_loss, iteration) if opt.noamopt: opt.current_lr = optimizer.rate() elif opt.reduce_on_plateau: opt.current_lr = optimizer.current_lr add_summary_value(tb_summary_writer, 'learning_rate', opt.current_lr, iteration) add_summary_value(tb_summary_writer, 'scheduled_sampling_prob', model.ss_prob, iteration) if sc_flag: add_summary_value(tb_summary_writer, 'avg_reward', np.mean(reward[:,0]), iteration) loss_history[iteration] = train_loss if not sc_flag else np.mean(reward[:,0]) lr_history[iteration] = opt.current_lr ss_prob_history[iteration] = model.ss_prob # make evaluation on validation set, and save model if (iteration % opt.save_checkpoint_every == 0): # eval model eval_kwargs = {'split': 'val', 'dataset': opt.input_json, 'use_box': opt.use_box} eval_kwargs.update(vars(opt)) val_loss, predictions, lang_stats = eval_utils.eval_split(dp_model, crit, loader, eval_kwargs) if opt.reduce_on_plateau: if 'CIDEr' in lang_stats: optimizer.scheduler_step(-lang_stats['CIDEr']) else: optimizer.scheduler_step(val_loss) # Write validation result into summary add_summary_value(tb_summary_writer, 'validation loss', val_loss, iteration) if lang_stats: for k,v in lang_stats.items(): add_summary_value(tb_summary_writer, k, v, iteration) val_result_history[iteration] = {'loss': val_loss, 'lang_stats': lang_stats, 'predictions': predictions} # Save model if is improving on validation result if opt.language_eval == 1: current_score = lang_stats['CIDEr'] else: current_score = - val_loss best_flag = False if True: # if true if best_val_score is None or current_score > best_val_score: best_val_score = current_score best_flag = True if not os.path.isdir(opt.checkpoint_path): os.makedirs(opt.checkpoint_path) checkpoint_path = os.path.join(opt.checkpoint_path, 'model.pth') torch.save(model.state_dict(), checkpoint_path) print("model saved to {}".format(checkpoint_path)) optimizer_path = os.path.join(opt.checkpoint_path, 'optimizer.pth') torch.save(optimizer.state_dict(), optimizer_path) # Dump miscalleous informations infos['iter'] = iteration infos['epoch'] = epoch infos['iterators'] = loader.iterators infos['split_ix'] = loader.split_ix infos['best_val_score'] = best_val_score infos['opt'] = opt infos['vocab'] = loader.get_vocab() histories['val_result_history'] = val_result_history histories['loss_history'] = loss_history histories['lr_history'] = lr_history histories['ss_prob_history'] = ss_prob_history with open(os.path.join(opt.checkpoint_path, 'infos_'+opt.id+'.pkl'), 'wb') as f: cPickle.dump(infos, f) with open(os.path.join(opt.checkpoint_path, 'histories_'+opt.id+'.pkl'), 'wb') as f: cPickle.dump(histories, f) if best_flag: checkpoint_path = os.path.join(opt.checkpoint_path, 'model-best.pth') torch.save(model.state_dict(), checkpoint_path) print("model saved to {}".format(checkpoint_path)) with open(os.path.join(opt.checkpoint_path, 'infos_'+opt.id+'-best.pkl'), 'wb') as f: cPickle.dump(infos, f) # Stop if reaching max epochs if epoch >= opt.max_epochs and opt.max_epochs != -1: break