def predict(test_data_loader, model, opt): if opt.delimiter_type == 0: delimiter_word = pykp.io.SEP_WORD else: delimiter_word = pykp.io.EOS_WORD generator = SequenceGenerator(model, bos_idx=opt.word2idx[pykp.io.BOS_WORD], eos_idx=opt.word2idx[pykp.io.EOS_WORD], pad_idx=opt.word2idx[pykp.io.PAD_WORD], peos_idx=opt.word2idx[pykp.io.PEOS_WORD], beam_size=1, max_sequence_length=opt.max_length, copy_attn=opt.copy_attention, coverage_attn=opt.coverage_attn, review_attn=opt.review_attn, cuda=opt.gpuid > -1 ) """ if opt.one2many and opt.one2many_mode > 1: prediction_by_sampling(generator, test_data_loader, opt, delimiter_word) else: evaluate_beam_search(generator, test_data_loader, opt, delimiter_word) """ if opt.sampling: raise ValueError("Not support yet!") #prediction_by_sampling(generator, test_data_loader, opt, delimiter_word) else: evaluate_beam_search(generator, test_data_loader, opt, delimiter_word)
def predict(test_data_loader, model, ntm_model, opt): if opt.delimiter_type == 0: delimiter_word = pykp.io.SEP_WORD else: delimiter_word = pykp.io.EOS_WORD generator = SequenceGenerator( model, ntm_model, opt.use_topic_represent, opt.topic_type, bos_idx=opt.word2idx[pykp.io.BOS_WORD], eos_idx=opt.word2idx[pykp.io.EOS_WORD], pad_idx=opt.word2idx[pykp.io.PAD_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_length, copy_attn=opt.copy_attention, coverage_attn=opt.coverage_attn, review_attn=opt.review_attn, length_penalty_factor=opt.length_penalty_factor, coverage_penalty_factor=opt.coverage_penalty_factor, length_penalty=opt.length_penalty, coverage_penalty=opt.coverage_penalty, cuda=opt.gpuid > -1, n_best=opt.n_best, block_ngram_repeat=opt.block_ngram_repeat, ignore_when_blocking=opt.ignore_when_blocking) evaluate_beam_search(generator, test_data_loader, opt, delimiter_word)
def main(): # load settings for training parser = argparse.ArgumentParser( description='predict.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) config.preprocess_opts(parser) config.model_opts(parser) config.train_opts(parser) config.predict_opts(parser) config.transformer_opts(parser) opt = parser.parse_args() if opt.seed > 0: torch.manual_seed(opt.seed) # print(opt.gpuid) if torch.cuda.is_available() and not opt.gpuid: opt.gpuid = 0 opt.exp = 'predict.' + opt.exp if hasattr(opt, 'copy_model') and opt.copy_model: opt.exp += '.copy' if hasattr(opt, 'bidirectional'): if opt.bidirectional: opt.exp += '.bi-directional' else: opt.exp += '.uni-directional' # fill time into the name if opt.exp_path.find('%s') > 0: opt.exp_path = opt.exp_path % (opt.exp, opt.timemark) opt.pred_path = opt.pred_path % (opt.exp, opt.timemark) if not os.path.exists(opt.exp_path): os.makedirs(opt.exp_path) if not os.path.exists(opt.pred_path): os.makedirs(opt.pred_path) logging = config.init_logging(logger_name=None, log_file=opt.exp_path + '/output.log', stdout=True) try: opt.train_from = 'model/kp20k.ml.copy.bi-directional.20180908-054257/kp20k.ml.copy.bi-directional.epoch=9.batch=2932.model' test_data_loader, word2id, id2word, vocab = load_data_vocab(opt, load_train=False) model = init_model(opt) generator = SequenceGenerator(model,opt, eos_id=opt.word2id[pykp.io.EOS_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_sent_length, ) evaluate_beam_search(generator, test_data_loader, opt, title='predict', save_path=opt.pred_path + '/[epoch=%d,batch=%d,total_batch=%d]test_result.csv' % (0, 0, 0)) except Exception as e: logging.exception("message")
def evaluate_per_epoch(model, eval_dataloader, opt, epoch): generator = SequenceGenerator( model, opt, eos_id=opt.word2id[pykp.io.EOS_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_sent_length, ) evaluate_beam_search(generator, eval_dataloader, opt, title='predict', save_path=opt.pred_path + '/epoch=%s' % (epoch))
def main(): opt = config.init_opt(description='predict.py') logger = config.init_logging('predict', opt.exp_path + '/output.log', redirect_to_stdout=False) logger.info('EXP_PATH : ' + opt.exp_path) logger.info('Parameters:') [ logger.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items() ] logger.info( '====================== Checking GPU Availability =========================' ) if torch.cuda.is_available(): if isinstance(opt.device_ids, int): opt.device_ids = [opt.device_ids] logger.info('Running on %s! devices=%s' % ('MULTIPLE GPUs' if len(opt.device_ids) > 1 else '1 GPU', str(opt.device_ids))) else: logger.info('Running on CPU!') try: test_data_loaders, word2id, id2word, vocab = load_vocab_and_testsets( opt) model = init_model(opt) generator = SequenceGenerator(model, eos_id=opt.word2id[pykp.io.EOS_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_sent_length) for testset_name, test_data_loader in zip(opt.test_dataset_names, test_data_loaders): logger.info('Evaluating %s' % testset_name) evaluate_beam_search(generator, test_data_loader, opt, title='test_%s' % testset_name, predict_save_path=opt.pred_path + '/%s_test_result/' % (testset_name)) except Exception as e: logger.error(e, exc_info=True)
def evaluate_per_epoch(model, eval_dataloader, opt): generator = SequenceGenerator( model, opt, eos_id=opt.word2id[pykp.io.EOS_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_sent_length, ) model_path = opt.train_from.split('/')[-1] _, epoch, batch, total_batch = re.findall('\d+', model_path) evaluate_beam_search(generator, test_data_loader, opt, title='predict', save_path=opt.pred_path + '/epoch=%s,batch=%s,total_batch=%s' % (epoch, batch, total_batch))
def predict(test_data_loader, model, opt): if opt.delimiter_type == 0: delimiter_word = pykp.io.SEP_WORD else: delimiter_word = pykp.io.EOS_WORD generator = SequenceGenerator(model, bos_idx=opt.word2idx[pykp.io.BOS_WORD], eos_idx=opt.word2idx[pykp.io.EOS_WORD], pad_idx=opt.word2idx[pykp.io.PAD_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_length, copy_attn=opt.copy_attention, coverage_attn=opt.coverage_attn, review_attn=opt.review_attn, include_attn_dist=opt.include_attn_dist, length_penalty_factor=opt.length_penalty_factor, coverage_penalty_factor=opt.coverage_penalty_factor, length_penalty=opt.length_penalty, coverage_penalty=opt.coverage_penalty, cuda=opt.gpuid > -1, n_best=opt.n_best, block_ngram_repeat=opt.block_ngram_repeat, ignore_when_blocking=opt.ignore_when_blocking, peos_idx=opt.word2idx[pykp.io.PEOS_WORD] ) """ if opt.one2many and opt.one2many_mode > 1: prediction_by_sampling(generator, test_data_loader, opt, delimiter_word) else: evaluate_beam_search(generator, test_data_loader, opt, delimiter_word) """ if opt.sampling: raise ValueError("Not support yet!") #prediction_by_sampling(generator, test_data_loader, opt, delimiter_word) else: evaluate_beam_search(generator, test_data_loader, opt, delimiter_word)
def train_model(model, optimizer, criterion, train_data_loader, valid_data_loader, test_data_loader, opt): generator = SequenceGenerator(model, eos_id=opt.word2id[pykp.io.EOS_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_sent_length ) logging.info('====================== Checking GPU Availability =========================') if torch.cuda.is_available(): if isinstance(opt.gpuid, int): opt.gpuid = [opt.gpuid] logging.info('Running on GPU! devices=%s' % str(opt.gpuid)) # model = nn.DataParallel(model, device_ids=opt.gpuid) else: logging.info('Running on CPU!') logging.info('====================== Start Training =========================') checkpoint_names = [] train_history_losses = [] valid_history_losses = [] test_history_losses = [] # best_loss = sys.float_info.max # for normal training/testing loss (likelihood) best_loss = 0.0 # for f-score stop_increasing = 0 train_losses = [] total_batch = 0 early_stop_flag = False if opt.train_from: state_path = opt.train_from.replace('.model', '.state') logging.info('Loading training state from: %s' % state_path) if os.path.exists(state_path): (epoch, total_batch, best_loss, stop_increasing, checkpoint_names, train_history_losses, valid_history_losses, test_history_losses) = torch.load(open(state_path, 'rb')) opt.start_epoch = epoch for epoch in range(opt.start_epoch , opt.epochs): if early_stop_flag: break progbar = Progbar(title='Training', target=len(train_data_loader), batch_size=train_data_loader.batch_size, total_examples=len(train_data_loader.dataset)) for batch_i, batch in enumerate(train_data_loader): model.train() batch_i += 1 # for the aesthetics of printing total_batch += 1 one2many_batch, one2one_batch = batch src, trg, trg_target, trg_copy_target, src_ext, oov_lists = one2one_batch max_oov_number = max([len(oov) for oov in oov_lists]) print("src size - ",src.size()) print("target size - ",trg.size()) if torch.cuda.is_available(): src = src.cuda() trg = trg.cuda() trg_target = trg_target.cuda() trg_copy_target = trg_copy_target.cuda() src_ext = src_ext.cuda() optimizer.zero_grad() ''' Training with Maximum Likelihood (word-level error) ''' decoder_log_probs, _, _ = model.forward(src, trg, src_ext, oov_lists) # simply average losses of all the predicitons # IMPORTANT, must use logits instead of probs to compute the loss, otherwise it's super super slow at the beginning (grads of probs are small)! start_time = time.time() if not opt.copy_model: ml_loss = criterion( decoder_log_probs.contiguous().view(-1, opt.vocab_size), trg_target.contiguous().view(-1) ) else: ml_loss = criterion( decoder_log_probs.contiguous().view(-1, opt.vocab_size + max_oov_number), trg_copy_target.contiguous().view(-1) ) ''' Training with Reinforcement Learning (instance-level reward f-score) ''' src_list, trg_list, _, trg_copy_target_list, src_oov_map_list, oov_list, src_str_list, trg_str_list = one2many_batch if torch.cuda.is_available(): src_list = src_list.cuda() src_oov_map_list = src_oov_map_list.cuda() rl_loss = get_loss_rl() start_time = time.time() ml_loss.backward() print("--backward- %s seconds ---" % (time.time() - start_time)) if opt.max_grad_norm > 0: pre_norm = torch.nn.utils.clip_grad_norm(model.parameters(), opt.max_grad_norm) after_norm = (sum([p.grad.data.norm(2) ** 2 for p in model.parameters() if p.grad is not None])) ** (1.0 / 2) logging.info('clip grad (%f -> %f)' % (pre_norm, after_norm)) optimizer.step() train_losses.append(ml_loss.data[0]) progbar.update(epoch, batch_i, [('train_loss', ml_loss.data[0]), ('PPL', ml_loss.data[0])]) if batch_i > 1 and batch_i % opt.report_every == 0: logging.info('====================== %d =========================' % (batch_i)) logging.info('Epoch : %d Minibatch : %d, Loss=%.5f' % (epoch, batch_i, np.mean(ml_loss.data[0]))) sampled_size = 2 logging.info('Printing predictions on %d sampled examples by greedy search' % sampled_size) if torch.cuda.is_available(): src = src.data.cpu().numpy() decoder_log_probs = decoder_log_probs.data.cpu().numpy() max_words_pred = decoder_log_probs.argmax(axis=-1) trg_target = trg_target.data.cpu().numpy() trg_copy_target = trg_copy_target.data.cpu().numpy() else: src = src.data.numpy() decoder_log_probs = decoder_log_probs.data.numpy() max_words_pred = decoder_log_probs.argmax(axis=-1) trg_target = trg_target.data.numpy() trg_copy_target = trg_copy_target.data.numpy() sampled_trg_idx = np.random.random_integers(low=0, high=len(trg) - 1, size=sampled_size) src = src[sampled_trg_idx] oov_lists = [oov_lists[i] for i in sampled_trg_idx] max_words_pred = [max_words_pred[i] for i in sampled_trg_idx] decoder_log_probs = decoder_log_probs[sampled_trg_idx] if not opt.copy_model: trg_target = [trg_target[i] for i in sampled_trg_idx] # use the real target trg_loss (the starting <BOS> has been removed and contains oov ground-truth) else: trg_target = [trg_copy_target[i] for i in sampled_trg_idx] for i, (src_wi, pred_wi, trg_i, oov_i) in enumerate(zip(src, max_words_pred, trg_target, oov_lists)): nll_prob = -np.sum([decoder_log_probs[i][l][pred_wi[l]] for l in range(len(trg_i))]) find_copy = np.any([x >= opt.vocab_size for x in src_wi]) has_copy = np.any([x >= opt.vocab_size for x in trg_i]) sentence_source = [opt.id2word[x] if x < opt.vocab_size else oov_i[x-opt.vocab_size] for x in src_wi] sentence_pred = [opt.id2word[x] if x < opt.vocab_size else oov_i[x-opt.vocab_size] for x in pred_wi] sentence_real = [opt.id2word[x] if x < opt.vocab_size else oov_i[x-opt.vocab_size] for x in trg_i] sentence_source = sentence_source[:sentence_source.index('<pad>')] if '<pad>' in sentence_source else sentence_source sentence_pred = sentence_pred[:sentence_pred.index('<pad>')] if '<pad>' in sentence_pred else sentence_pred sentence_real = sentence_real[:sentence_real.index('<pad>')] if '<pad>' in sentence_real else sentence_real logging.info('==================================================') logging.info('Source: %s ' % (' '.join(sentence_source))) logging.info('\t\tPred : %s (%.4f)' % (' '.join(sentence_pred), nll_prob) + (' [FIND COPY]' if find_copy else '')) logging.info('\t\tReal : %s ' % (' '.join(sentence_real)) + (' [HAS COPY]' + str(trg_i) if has_copy else '')) if total_batch > 1 and total_batch % opt.run_valid_every == 0: logging.info('*' * 50) logging.info('Run validing and testing @Epoch=%d,#(Total batch)=%d' % (epoch, total_batch)) # valid_losses = _valid_error(valid_data_loader, model, criterion, epoch, opt) # valid_history_losses.append(valid_losses) valid_score_dict = evaluate_beam_search(generator, valid_data_loader, opt, title='valid', epoch=epoch, save_path=opt.exp_path + '/epoch%d_batch%d_total_batch%d' % (epoch, batch_i, total_batch)) test_score_dict = evaluate_beam_search(generator, test_data_loader, opt, title='test', epoch=epoch, save_path=opt.exp_path + '/epoch%d_batch%d_total_batch%d' % (epoch, batch_i, total_batch)) checkpoint_names.append('epoch=%d-batch=%d-total_batch=%d' % (epoch, batch_i, total_batch)) train_history_losses.append(copy.copy(train_losses)) valid_history_losses.append(valid_score_dict) test_history_losses.append(test_score_dict) train_losses = [] scores = [train_history_losses] curve_names = ['Training Error'] scores += [[result_dict[name] for result_dict in valid_history_losses] for name in opt.report_score_names] curve_names += ['Valid-'+name for name in opt.report_score_names] scores += [[result_dict[name] for result_dict in test_history_losses] for name in opt.report_score_names] curve_names += ['Test-'+name for name in opt.report_score_names] scores = [np.asarray(s) for s in scores] # Plot the learning curve plot_learning_curve(scores=scores, curve_names=curve_names, checkpoint_names=checkpoint_names, title='Training Validation & Test', save_path=opt.exp_path + '/[epoch=%d,batch=%d,total_batch=%d]train_valid_test_curve.png' % (epoch, batch_i, total_batch)) ''' determine if early stop training (whether f-score increased, before is if valid error decreased) ''' valid_loss = np.average(valid_history_losses[-1][opt.report_score_names[0]]) is_best_loss = valid_loss > best_loss rate_of_change = float(valid_loss - best_loss) / float(best_loss) if float(best_loss) > 0 else 0.0 # valid error doesn't increase if rate_of_change <= 0: stop_increasing += 1 else: stop_increasing = 0 if is_best_loss: logging.info('Validation: update best loss (%.4f --> %.4f), rate of change (ROC)=%.2f' % ( best_loss, valid_loss, rate_of_change * 100)) else: logging.info('Validation: best loss is not updated for %d times (%.4f --> %.4f), rate of change (ROC)=%.2f' % ( stop_increasing, best_loss, valid_loss, rate_of_change * 100)) best_loss = max(valid_loss, best_loss) # only store the checkpoints that make better validation performances if total_batch > 1 and (total_batch % opt.save_model_every == 0 or is_best_loss): #epoch >= opt.start_checkpoint_at and # Save the checkpoint logging.info('Saving checkpoint to: %s' % os.path.join(opt.save_path, '%s.epoch=%d.batch=%d.total_batch=%d.error=%f' % (opt.exp, epoch, batch_i, total_batch, valid_loss) + '.model')) torch.save( model.state_dict(), open(os.path.join(opt.save_path, '%s.epoch=%d.batch=%d.total_batch=%d' % (opt.exp, epoch, batch_i, total_batch) + '.model'), 'wb') ) torch.save( (epoch, total_batch, best_loss, stop_increasing, checkpoint_names, train_history_losses, valid_history_losses, test_history_losses), open(os.path.join(opt.save_path, '%s.epoch=%d.batch=%d.total_batch=%d' % (opt.exp, epoch, batch_i, total_batch) + '.state'), 'wb') ) if stop_increasing >= opt.early_stop_tolerance: logging.info('Have not increased for %d epoches, early stop training' % stop_increasing) early_stop_flag = True break logging.info('*' * 50)
def train_model(model, optimizer_ml, optimizer_rl, criterion, train_data_loader, valid_data_loader, test_data_loader, opt): generator = SequenceGenerator(model, eos_id=opt.word2id[pykp.io.EOS_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_sent_length ) logging.info('====================== Checking GPU Availability =========================') if torch.cuda.is_available(): if isinstance(opt.gpuid, int): opt.gpuid = [opt.gpuid] logging.info('Running on GPU! devices=%s' % str(opt.gpuid)) # model = nn.DataParallel(model, device_ids=opt.gpuid) else: logging.info('Running on CPU!') logging.info('====================== Start Training =========================') checkpoint_names = [] train_ml_history_losses = [] train_rl_history_losses = [] valid_history_losses = [] test_history_losses = [] # best_loss = sys.float_info.max # for normal training/testing loss (likelihood) best_loss = 0.0 # for f-score stop_increasing = 0 train_ml_losses = [] train_rl_losses = [] total_batch = -1 early_stop_flag = False if opt.train_rl: reward_cache = RewardCache(2000) if False: # opt.train_from: state_path = opt.train_from.replace('.model', '.state') logging.info('Loading training state from: %s' % state_path) if os.path.exists(state_path): (epoch, total_batch, best_loss, stop_increasing, checkpoint_names, train_ml_history_losses, train_rl_history_losses, valid_history_losses, test_history_losses) = torch.load(open(state_path, 'rb')) opt.start_epoch = epoch for epoch in range(opt.start_epoch, opt.epochs): if early_stop_flag: break progbar = Progbar(logger=logging, title='Training', target=len(train_data_loader), batch_size=train_data_loader.batch_size, total_examples=len(train_data_loader.dataset.examples)) for batch_i, batch in enumerate(train_data_loader): model.train() total_batch += 1 one2many_batch, one2one_batch = batch report_loss = [] # Training if opt.train_ml: loss_ml, decoder_log_probs = train_ml(one2one_batch, model, optimizer_ml, criterion, opt) train_ml_losses.append(loss_ml) report_loss.append(('train_ml_loss', loss_ml)) report_loss.append(('PPL', loss_ml)) # Brief report if batch_i % opt.report_every == 0: brief_report(epoch, batch_i, one2one_batch, loss_ml, decoder_log_probs, opt) # do not apply rl in 0th epoch, need to get a resonable model before that. if opt.train_rl: if epoch >= opt.rl_start_epoch: loss_rl = train_rl(one2many_batch, model, optimizer_rl, generator, opt, reward_cache) else: loss_rl = 0.0 train_rl_losses.append(loss_rl) report_loss.append(('train_rl_loss', loss_rl)) progbar.update(epoch, batch_i, report_loss) # Validate and save checkpoint if (opt.run_valid_every == -1 and batch_i == len(train_data_loader) - 1) or\ (opt.run_valid_every > -1 and total_batch > 1 and total_batch % opt.run_valid_every == 0): logging.info('*' * 50) logging.info('Run validing and testing @Epoch=%d,#(Total batch)=%d' % (epoch, total_batch)) # valid_losses = _valid_error(valid_data_loader, model, criterion, epoch, opt) # valid_history_losses.append(valid_losses) valid_score_dict = evaluate_beam_search(generator, valid_data_loader, opt, title='Validating, epoch=%d, batch=%d, total_batch=%d' % (epoch, batch_i, total_batch), epoch=epoch, save_path=opt.pred_path + '/epoch%d_batch%d_total_batch%d' % (epoch, batch_i, total_batch)) test_score_dict = evaluate_beam_search(generator, test_data_loader, opt, title='Testing, epoch=%d, batch=%d, total_batch=%d' % (epoch, batch_i, total_batch), epoch=epoch, save_path=opt.pred_path + '/epoch%d_batch%d_total_batch%d' % (epoch, batch_i, total_batch)) checkpoint_names.append('epoch=%d-batch=%d-total_batch=%d' % (epoch, batch_i, total_batch)) curve_names = [] scores = [] if opt.train_ml: train_ml_history_losses.append(copy.copy(train_ml_losses)) scores += [train_ml_history_losses] curve_names += ['Training ML Error'] train_ml_losses = [] if opt.train_rl: train_rl_history_losses.append(copy.copy(train_rl_losses)) scores += [train_rl_history_losses] curve_names += ['Training RL Reward'] train_rl_losses = [] valid_history_losses.append(valid_score_dict) test_history_losses.append(test_score_dict) scores += [[result_dict[name] for result_dict in valid_history_losses] for name in opt.report_score_names] curve_names += ['Valid-' + name for name in opt.report_score_names] scores += [[result_dict[name] for result_dict in test_history_losses] for name in opt.report_score_names] curve_names += ['Test-' + name for name in opt.report_score_names] scores = [np.asarray(s) for s in scores] # Plot the learning curve plot_learning_curve(scores=scores, curve_names=curve_names, checkpoint_names=checkpoint_names, title='Training Validation & Test', save_path=opt.exp_path + '/[epoch=%d,batch=%d,total_batch=%d]train_valid_test_curve.png' % (epoch, batch_i, total_batch)) ''' determine if early stop training (whether f-score increased, before is if valid error decreased) ''' valid_loss = np.average(valid_history_losses[-1][opt.report_score_names[0]]) is_best_loss = valid_loss > best_loss rate_of_change = float(valid_loss - best_loss) / float(best_loss) if float(best_loss) > 0 else 0.0 # valid error doesn't increase if rate_of_change <= 0: stop_increasing += 1 else: stop_increasing = 0 if is_best_loss: logging.info('Validation: update best loss (%.4f --> %.4f), rate of change (ROC)=%.2f' % ( best_loss, valid_loss, rate_of_change * 100)) else: logging.info('Validation: best loss is not updated for %d times (%.4f --> %.4f), rate of change (ROC)=%.2f' % ( stop_increasing, best_loss, valid_loss, rate_of_change * 100)) best_loss = max(valid_loss, best_loss) # only store the checkpoints that make better validation performances if total_batch > 1 and (total_batch % opt.save_model_every == 0 or is_best_loss): # epoch >= opt.start_checkpoint_at and # Save the checkpoint logging.info('Saving checkpoint to: %s' % os.path.join(opt.model_path, '%s.epoch=%d.batch=%d.total_batch=%d.error=%f' % (opt.exp, epoch, batch_i, total_batch, valid_loss) + '.model')) torch.save( model.state_dict(), open(os.path.join(opt.model_path, '%s.epoch=%d.batch=%d.total_batch=%d' % (opt.exp, epoch, batch_i, total_batch) + '.model'), 'wb') ) torch.save( (epoch, total_batch, best_loss, stop_increasing, checkpoint_names, train_ml_history_losses, train_rl_history_losses, valid_history_losses, test_history_losses), open(os.path.join(opt.model_path, '%s.epoch=%d.batch=%d.total_batch=%d' % (opt.exp, epoch, batch_i, total_batch) + '.state'), 'wb') ) if stop_increasing >= opt.early_stop_tolerance: logging.info('Have not increased for %d epoches, early stop training' % stop_increasing) early_stop_flag = True break logging.info('*' * 50)
def train_model(model, optimizer, criterion, train_data_loader, valid_data_loader, test_data_loader, opt): generator = SequenceGenerator(model, eos_id=opt.word2id[pykp.io.EOS_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_sent_length) logging.info( '====================== Checking GPU Availability =========================' ) if torch.cuda.is_available(): if isinstance(opt.gpuid, int): opt.gpuid = [opt.gpuid] logging.info('Running on GPU! devices=%s' % str(opt.gpuid)) # model = nn.DataParallel(model, device_ids=opt.gpuid) else: logging.info('Running on CPU!') logging.info( '====================== Start Training =========================') checkpoint_names = [] train_history_losses = [] valid_history_losses = [] test_history_losses = [] # best_loss = sys.float_info.max # for normal training/testing loss (likelihood) best_loss = 0.0 # for f-score stop_increasing = 0 train_losses = [] total_batch = 0 early_stop_flag = False if opt.train_from: state_path = opt.train_from.replace('.model', '.state') logging.info('Loading training state from: %s' % state_path) if os.path.exists(state_path): (epoch, total_batch, best_loss, stop_increasing, checkpoint_names, train_history_losses, valid_history_losses, test_history_losses) = torch.load(open(state_path, 'rb')) opt.start_epoch = epoch for epoch in range(opt.start_epoch, opt.epochs): if early_stop_flag: break progbar = Progbar(title='Training', target=len(train_data_loader), batch_size=train_data_loader.batch_size, total_examples=len(train_data_loader.dataset)) for batch_i, batch in enumerate(train_data_loader): model.train() batch_i += 1 # for the aesthetics of printing total_batch += 1 one2many_batch, one2one_batch = batch src, trg, trg_target, trg_copy_target, src_ext, oov_lists = one2one_batch max_oov_number = max([len(oov) for oov in oov_lists]) print("src size - ", src.size()) print("target size - ", trg.size()) if torch.cuda.is_available(): src = src.cuda() trg = trg.cuda() trg_target = trg_target.cuda() trg_copy_target = trg_copy_target.cuda() src_ext = src_ext.cuda() optimizer.zero_grad() ''' Training with Maximum Likelihood (word-level error) ''' decoder_log_probs, _, _ = model.forward(src, trg, src_ext, oov_lists) # simply average losses of all the predicitons # IMPORTANT, must use logits instead of probs to compute the loss, otherwise it's super super slow at the beginning (grads of probs are small)! start_time = time.time() if not opt.copy_model: ml_loss = criterion( decoder_log_probs.contiguous().view(-1, opt.vocab_size), trg_target.contiguous().view(-1)) else: ml_loss = criterion( decoder_log_probs.contiguous().view( -1, opt.vocab_size + max_oov_number), trg_copy_target.contiguous().view(-1)) ''' Training with Reinforcement Learning (instance-level reward f-score) ''' src_list, trg_list, _, trg_copy_target_list, src_oov_map_list, oov_list, src_str_list, trg_str_list = one2many_batch if torch.cuda.is_available(): src_list = src_list.cuda() src_oov_map_list = src_oov_map_list.cuda() rl_loss = get_loss_rl() start_time = time.time() ml_loss.backward() print("--backward- %s seconds ---" % (time.time() - start_time)) if opt.max_grad_norm > 0: pre_norm = torch.nn.utils.clip_grad_norm( model.parameters(), opt.max_grad_norm) after_norm = (sum([ p.grad.data.norm(2)**2 for p in model.parameters() if p.grad is not None ]))**(1.0 / 2) logging.info('clip grad (%f -> %f)' % (pre_norm, after_norm)) optimizer.step() train_losses.append(ml_loss.data[0]) progbar.update(epoch, batch_i, [('train_loss', ml_loss.data[0]), ('PPL', ml_loss.data[0])]) if batch_i > 1 and batch_i % opt.report_every == 0: logging.info( '====================== %d =========================' % (batch_i)) logging.info('Epoch : %d Minibatch : %d, Loss=%.5f' % (epoch, batch_i, np.mean(ml_loss.data[0]))) sampled_size = 2 logging.info( 'Printing predictions on %d sampled examples by greedy search' % sampled_size) if torch.cuda.is_available(): src = src.data.cpu().numpy() decoder_log_probs = decoder_log_probs.data.cpu().numpy() max_words_pred = decoder_log_probs.argmax(axis=-1) trg_target = trg_target.data.cpu().numpy() trg_copy_target = trg_copy_target.data.cpu().numpy() else: src = src.data.numpy() decoder_log_probs = decoder_log_probs.data.numpy() max_words_pred = decoder_log_probs.argmax(axis=-1) trg_target = trg_target.data.numpy() trg_copy_target = trg_copy_target.data.numpy() sampled_trg_idx = np.random.random_integers(low=0, high=len(trg) - 1, size=sampled_size) src = src[sampled_trg_idx] oov_lists = [oov_lists[i] for i in sampled_trg_idx] max_words_pred = [max_words_pred[i] for i in sampled_trg_idx] decoder_log_probs = decoder_log_probs[sampled_trg_idx] if not opt.copy_model: trg_target = [ trg_target[i] for i in sampled_trg_idx ] # use the real target trg_loss (the starting <BOS> has been removed and contains oov ground-truth) else: trg_target = [trg_copy_target[i] for i in sampled_trg_idx] for i, (src_wi, pred_wi, trg_i, oov_i) in enumerate( zip(src, max_words_pred, trg_target, oov_lists)): nll_prob = -np.sum([ decoder_log_probs[i][l][pred_wi[l]] for l in range(len(trg_i)) ]) find_copy = np.any([x >= opt.vocab_size for x in src_wi]) has_copy = np.any([x >= opt.vocab_size for x in trg_i]) sentence_source = [ opt.id2word[x] if x < opt.vocab_size else oov_i[x - opt.vocab_size] for x in src_wi ] sentence_pred = [ opt.id2word[x] if x < opt.vocab_size else oov_i[x - opt.vocab_size] for x in pred_wi ] sentence_real = [ opt.id2word[x] if x < opt.vocab_size else oov_i[x - opt.vocab_size] for x in trg_i ] sentence_source = sentence_source[:sentence_source.index( '<pad>' )] if '<pad>' in sentence_source else sentence_source sentence_pred = sentence_pred[:sentence_pred.index( '<pad>' )] if '<pad>' in sentence_pred else sentence_pred sentence_real = sentence_real[:sentence_real.index( '<pad>' )] if '<pad>' in sentence_real else sentence_real logging.info( '==================================================') logging.info('Source: %s ' % (' '.join(sentence_source))) logging.info('\t\tPred : %s (%.4f)' % (' '.join(sentence_pred), nll_prob) + (' [FIND COPY]' if find_copy else '')) logging.info('\t\tReal : %s ' % (' '.join(sentence_real)) + (' [HAS COPY]' + str(trg_i) if has_copy else '')) if total_batch > 1 and total_batch % opt.run_valid_every == 0: logging.info('*' * 50) logging.info( 'Run validing and testing @Epoch=%d,#(Total batch)=%d' % (epoch, total_batch)) # valid_losses = _valid_error(valid_data_loader, model, criterion, epoch, opt) # valid_history_losses.append(valid_losses) valid_score_dict = evaluate_beam_search( generator, valid_data_loader, opt, title='valid', epoch=epoch, save_path=opt.exp_path + '/epoch%d_batch%d_total_batch%d' % (epoch, batch_i, total_batch)) test_score_dict = evaluate_beam_search( generator, test_data_loader, opt, title='test', epoch=epoch, save_path=opt.exp_path + '/epoch%d_batch%d_total_batch%d' % (epoch, batch_i, total_batch)) checkpoint_names.append('epoch=%d-batch=%d-total_batch=%d' % (epoch, batch_i, total_batch)) train_history_losses.append(copy.copy(train_losses)) valid_history_losses.append(valid_score_dict) test_history_losses.append(test_score_dict) train_losses = [] scores = [train_history_losses] curve_names = ['Training Error'] scores += [[ result_dict[name] for result_dict in valid_history_losses ] for name in opt.report_score_names] curve_names += [ 'Valid-' + name for name in opt.report_score_names ] scores += [[ result_dict[name] for result_dict in test_history_losses ] for name in opt.report_score_names] curve_names += [ 'Test-' + name for name in opt.report_score_names ] scores = [np.asarray(s) for s in scores] # Plot the learning curve plot_learning_curve( scores=scores, curve_names=curve_names, checkpoint_names=checkpoint_names, title='Training Validation & Test', save_path=opt.exp_path + '/[epoch=%d,batch=%d,total_batch=%d]train_valid_test_curve.png' % (epoch, batch_i, total_batch)) ''' determine if early stop training (whether f-score increased, before is if valid error decreased) ''' valid_loss = np.average( valid_history_losses[-1][opt.report_score_names[0]]) is_best_loss = valid_loss > best_loss rate_of_change = float(valid_loss - best_loss) / float( best_loss) if float(best_loss) > 0 else 0.0 # valid error doesn't increase if rate_of_change <= 0: stop_increasing += 1 else: stop_increasing = 0 if is_best_loss: logging.info( 'Validation: update best loss (%.4f --> %.4f), rate of change (ROC)=%.2f' % (best_loss, valid_loss, rate_of_change * 100)) else: logging.info( 'Validation: best loss is not updated for %d times (%.4f --> %.4f), rate of change (ROC)=%.2f' % (stop_increasing, best_loss, valid_loss, rate_of_change * 100)) best_loss = max(valid_loss, best_loss) # only store the checkpoints that make better validation performances if total_batch > 1 and ( total_batch % opt.save_model_every == 0 or is_best_loss): #epoch >= opt.start_checkpoint_at and # Save the checkpoint logging.info('Saving checkpoint to: %s' % os.path.join( opt.save_path, '%s.epoch=%d.batch=%d.total_batch=%d.error=%f' % (opt.exp, epoch, batch_i, total_batch, valid_loss) + '.model')) torch.save( model.state_dict(), open( os.path.join( opt.save_path, '%s.epoch=%d.batch=%d.total_batch=%d' % (opt.exp, epoch, batch_i, total_batch) + '.model'), 'wb')) torch.save((epoch, total_batch, best_loss, stop_increasing, checkpoint_names, train_history_losses, valid_history_losses, test_history_losses), open( os.path.join( opt.save_path, '%s.epoch=%d.batch=%d.total_batch=%d' % (opt.exp, epoch, batch_i, total_batch) + '.state'), 'wb')) if stop_increasing >= opt.early_stop_tolerance: logging.info( 'Have not increased for %d epoches, early stop training' % stop_increasing) early_stop_flag = True break logging.info('*' * 50)
def main(): # load settings for training parser = argparse.ArgumentParser( description='predict.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) config.preprocess_opts(parser) config.model_opts(parser) config.train_opts(parser) config.predict_opts(parser) opt = parser.parse_args() if opt.seed > 0: torch.manual_seed(opt.seed) print(opt.gpuid) if torch.cuda.is_available() and not opt.gpuid: opt.gpuid = 0 opt.exp = 'predict.' + opt.exp if hasattr(opt, 'copy_model') and opt.copy_model: opt.exp += '.copy' if hasattr(opt, 'bidirectional'): if opt.bidirectional: opt.exp += '.bi-directional' else: opt.exp += '.uni-directional' # fill time into the name if opt.exp_path.find('%s') > 0: opt.exp_path = opt.exp_path % (opt.exp, opt.timemark) opt.pred_path = opt.pred_path % (opt.exp, opt.timemark) if not os.path.exists(opt.exp_path): os.makedirs(opt.exp_path) if not os.path.exists(opt.pred_path): os.makedirs(opt.pred_path) logging = config.init_logging('train', opt.exp_path + '/output.log') logging.info('Parameters:') [ logging.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items() ] try: train_data_loader, valid_data_loader, test_data_loader, word2id, id2word, vocab = load_data_vocab( opt, load_train=False) model = init_model(opt) # optimizer, criterion = init_optimizer_criterion(model, opt) generator = SequenceGenerator(model, eos_id=opt.word2id[pykp.io.EOS_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_sent_length) # import time # start_time = time.time() evaluate_beam_search( generator, test_data_loader, opt, title='predict', save_path=opt.pred_path + '/[epoch=%d,batch=%d,total_batch=%d]test_result.csv' % (0, 0, 0)) # print("--- %s seconds --- Complete Beam Search" % (time.time() - start_time)) # predict_greedy(model, test_data_loader, test_examples, opt) except Exception as e: logging.exception("message")
def main(): opt = config.init_opt(description='predict.py') opt.data = 'data3/kp20k/kp20k' opt.vocab = 'data3/kp20k/kp20k.vocab.pt' #opt.train_from = 'exp/kp20k.ml.copy.20181129-193506/model/kp20k.ml.copy.epoch=1.batch=20000.total_batch=20000.model' opt.train_from = 'exp/kp20k.ml.copy.20181128-153121/model/kp20k.ml.copy.epoch=2.batch=15495.total_batch=38000.model' opt.useGpu = 0 opt.encoder_type = 'rnn' opt.useCLF = False if opt.encoder_type.startswith('transformer'): opt.batch_size = 32 opt.d_inner = 2048 opt.enc_n_layers = 4 opt.dec_n_layers = 2 opt.n_head = 8 opt.d_k = 64 opt.d_v = 64 opt.d_model = 512 opt.word_vec_size = 512 opt.run_valid_every = 5000000 opt.save_model_every = 20000 opt.decode_old = True # opt.copy_attention = False elif opt.encoder_type.startswith('bert'): opt.useOnlyTwo = False opt.avgHidden = True opt.useZeroDecodeHidden = False opt.useSameEmbeding = False opt.batch_size = 10 opt.max_sent_length = 10 opt.run_valid_every = 20000 opt.decode_old = False opt.beam_search_batch_size = 10 opt.bert_model = 'bert-base-uncased' opt.tokenizer = BertTokenizer.from_pretrained(opt.bert_model) if opt.encoder_type == 'bert_low': opt.copy_attention = False else: opt.enc_layers = 2 opt.bidirectional = True opt.decode_old = True logger = config.init_logging('predict', opt.exp_path + '/output.log', redirect_to_stdout=False) logger.info('EXP_PATH : ' + opt.exp_path) logger.info('Parameters:') [ logger.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items() ] logger.info( '====================== Checking GPU Availability =========================' ) if torch.cuda.is_available() and opt.useGpu: if isinstance(opt.gpuid, int): opt.gpuid = [opt.gpuid] logger.info('Running on %s! devices=%s' % ('MULTIPLE GPUs' if len(opt.gpuid) > 1 else '1 GPU', str(opt.gpuid))) else: logger.info('Running on CPU!') try: test_data_loaders, word2id, id2word, vocab = load_vocab_and_testsets( opt) model = init_model(opt) if torch.cuda.is_available() and opt.useGpu: model.cuda() generator = SequenceGenerator(model, opt.word_vec_size if opt.encoder_type == 'transformer' else opt.vocab_size, eos_id=opt.word2id[pykp.io.EOS_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_sent_length, useGpu=opt.useGpu) for testset_name, test_data_loader in zip(opt.test_dataset_names, test_data_loaders): logger.info('Evaluating %s' % testset_name) evaluate_beam_search(generator, test_data_loader, opt, title='test_%s' % testset_name, predict_save_path=opt.pred_path + '/%s_test_result/' % (testset_name)) except Exception as e: logger.error(e, exc_info=True)