def report_training(self, step, num_steps, learning_rate, report_stats, multigpu=False): """ This is the user-defined batch-level traing progress report function. Args: step(int): current step count. num_steps(int): total number of batches. learning_rate(float): current learning rate. report_stats(Statistics): old Statistics instance. Returns: report_stats(Statistics): updated Statistics instance. """ if self.start_time < 0: raise ValueError("""ReportMgr needs to be started (set 'start_time' or use 'start()'""") if step % self.report_every == 0: if multigpu: report_stats = \ Statistics.all_gather_stats(report_stats) self._report_training(step, num_steps, learning_rate, report_stats) self.progress_step += 1 return Statistics() else: return report_stats
def validate(self, valid_iter, step=0): """ Validate model. valid_iter: validate data iterator Returns: :obj:`nmt.Statistics`: validation loss statistics """ # Set model in validating mode. self.model.eval() stats = Statistics() with torch.no_grad(): for batch in valid_iter: src = batch.src labels = batch.labels segs = batch.segs clss = batch.clss mask = batch.mask mask_cls = batch.mask_cls sent_scores, mask = self.model(src, segs, clss, mask, mask_cls) loss = self.loss(sent_scores, labels.float()) loss = (loss * mask.float()).sum() batch_stats = Statistics(float(loss.cpu().data.numpy()), len(labels)) stats.update(batch_stats) self._report_step(0, step, valid_stats=stats) return stats
def _gradient_accumulation(self, true_batchs, normalization, total_stats, report_stats): if self.grad_accum_count > 1: self.model.zero_grad() for batch in true_batchs: if self.grad_accum_count == 1: self.model.zero_grad() src = batch.src labels = batch.labels segs = batch.segs clss = batch.clss mask = batch.mask mask_cls = batch.mask_cls sent_scores, mask = self.model(src, segs, clss, mask, mask_cls) loss = self.loss(sent_scores, labels.float()) loss = (loss * mask.float()).sum() (loss / loss.numel()).backward() batch_stats = Statistics(float(loss.cpu().data.numpy()), normalization) total_stats.update(batch_stats) report_stats.update(batch_stats) if self.grad_accum_count == 1: self.optim.step() if self.grad_accum_count > 1: self.optim.step()
def train(self, train_iter_fct, train_steps, valid_iter_fct=None, valid_steps=-1): logger.info('Start training...') step = self.optim._step + 1 true_batchs = [] accum = 0 normalization = 0 train_iter = train_iter_fct() total_stats = Statistics() report_stats = Statistics() self._start_report_manager(start_time=total_stats.start_time) while step <= train_steps: reduce_counter = 0 for i, batch in enumerate(train_iter): if self.n_gpu == 0 or (i % self.n_gpu == self.gpu_rank): true_batchs.append(batch) normalization += batch.batch_size accum += 1 if accum == self.grad_accum_count: reduce_counter += 1 self._gradient_accumulation( true_batchs, normalization, total_stats, report_stats) report_stats = self._maybe_report_training( step, train_steps, self.optim.learning_rate, report_stats) true_batchs = [] accum = 0 normalization = 0 if (step % self.save_checkpoint_steps == 0 and self.gpu_rank == 0): self._save(step) step += 1 if step > train_steps: break train_iter = train_iter_fct() return total_stats
def iter_test(self, test_iter, step, sum_sent_count=3): """ select sentences in each iteration given selected sentences, predict the next one """ self.model.eval() stats = Statistics() #dir_name = os.path.dirname(self.args.result_path) base_name = os.path.basename(self.args.result_path) #base_dir = os.path.join(dir_name, 'iter_eval') base_dir = os.path.dirname(self.args.result_path) if (not os.path.exists(base_dir)): os.makedirs(base_dir) can_path = '%s/%s_step%d_itereval.candidate'%(base_dir, base_name, step) gold_path = '%s/%s_step%d_itereval.gold' % (base_dir, base_name, step) all_pred_ids, all_gold_ids, all_doc_ids = [], [], [] all_gold_texts, all_pred_texts = [], [] with torch.no_grad(): for batch in test_iter: doc_ids = batch.doc_id oracle_ids = [set([j for j in seq if j > -1]) for seq in batch.label_seq.tolist()] sel_sent_idxs, sel_sent_masks = self.model.infer_sentences(batch, sum_sent_count, stats=stats) sel_sent_idxs = sel_sent_idxs.tolist() all_pred_ids.extend(sel_sent_idxs) for i in range(batch.batch_size): _pred = '<q>'.join([batch.src_str[i][idx].strip() for j, idx in enumerate(sel_sent_idxs[i]) if sel_sent_masks[i][j]]) all_pred_texts.append(_pred) all_gold_texts.append(batch.tgt_str[i]) all_gold_ids.append(oracle_ids[i]) all_doc_ids.append(doc_ids[i]) macro_precision, micro_precision = self._output_predicted_summaries( all_doc_ids, all_pred_ids, all_gold_ids, all_pred_texts, all_gold_texts, can_path, gold_path) rouge1_arr, rouge2_arr = du.cal_rouge_score(all_pred_texts, all_gold_texts) rouge_1, rouge_2 = du.aggregate_rouge(rouge1_arr, rouge2_arr) logger.info('[PERF]At step %d: rouge1:%.2f rouge2:%.2f' % ( step, rouge_1 * 100, rouge_2 * 100)) if(step!=-1 and self.args.report_precision): macro_arr = ["P@%s:%.2f%%" % (i+1, macro_precision[i] * 100) for i in range(3)] micro_arr = ["P@%s:%.2f%%" % (i+1, micro_precision[i] * 100) for i in range(3)] logger.info('[PERF]MacroPrecision at step %d: %s' % (step, '\t'.join(macro_arr))) logger.info('[PERF]MicroPrecision at step %d: %s' % (step, '\t'.join(micro_arr))) if(step!=-1 and self.args.report_rouge): rouge_str, detail_rouge = test_rouge(self.args.temp_dir, can_path, gold_path, all_doc_ids, show_all=True) logger.info('[PERF]Rouges at step %d: %s \n' % (step, rouge_str)) result_path = '%s_step%d_itereval.rouge' % (self.args.result_path, step) if detail_rouge is not None: du.output_rouge_file(result_path, rouge1_arr, rouge2_arr, detail_rouge, all_doc_ids) self._report_step(0, step, valid_stats=stats) return stats
def _report_training(self, step, num_steps, learning_rate, report_stats): """ See base class method `ReportMgrBase.report_training`. """ report_stats.output(step, num_steps, learning_rate, self.start_time) report_stats = Statistics() return report_stats
def _report_training(self, step, num_steps, learning_rate, report_stats): """ See base class method `ReportMgrBase.report_training`. """ report_stats.output(step, num_steps, learning_rate, self.start_time) # Log the progress using the number of batches on the x-axis. self.maybe_log_tensorboard(report_stats, "progress", learning_rate, self.progress_step) report_stats = Statistics() return report_stats
def _maybe_gather_stats(self, stat): """ Gather statistics in multi-processes cases Args: stat(:obj:onmt.utils.Statistics): a Statistics object to gather or None (it returns None in this case) Returns: stat: the updated (or unchanged) stat object """ if stat is not None and self.n_gpu > 1: return Statistics.all_gather_stats(stat) return stat
def train(self, train_iter_fct, train_steps): step = self.optim._step + 1 true_batchs = [] accum = 0 normalization = 0 train_iter = train_iter_fct() total_stats = Statistics() report_stats = Statistics() self._start_report_manager(start_time=total_stats.start_time) while step <= train_steps: reduce_counter = 0 batch = next(train_iter) true_batchs.append(batch) normalization += batch.batch_size accum += 1 if accum == self.grad_accum_count: reduce_counter += 1 self._gradient_accumulation(true_batchs, normalization, total_stats, report_stats) report_stats = self._report_training(step, train_steps, self.optim.learning_rate, report_stats) true_batchs = [] accum = 0 normalization = 0 if step % self.save_checkpoint_steps == 0 and self.gpu_rank == 0: self._save(step) step += 1 if step > train_steps: break train_iter = train_iter_fct() return total_stats
def _gradient_accumulation(self, true_batchs, normalization, total_stats, report_stats): if self.grad_accum_count > 1: self.model.zero_grad() for batch in true_batchs: if self.grad_accum_count == 1: self.model.zero_grad() src = batch.src labels = batch.labels segs = batch.segs clss = batch.clss mask = batch.mask mask_cls = batch.mask_cls sent_scores, mask = self.model(src, segs, clss, mask, mask_cls) loss = self.loss(sent_scores, labels.float()) loss = (loss*mask.float()).sum() (loss/loss.numel()).backward() # loss.div(float(normalization)).backward() batch_stats = Statistics(float(loss.cpu().data.numpy()), normalization) total_stats.update(batch_stats) report_stats.update(batch_stats) # 4. Update the parameters and statistics. if self.grad_accum_count == 1: # Multi GPU gradient gather if self.n_gpu > 1: grads = [p.grad.data for p in self.model.parameters() if p.requires_grad and p.grad is not None] distributed.all_reduce_and_rescale_tensors( grads, float(1)) self.optim.step() # in case of multi step gradient accumulation, # update only after accum batches if self.grad_accum_count > 1: if self.n_gpu > 1: grads = [p.grad.data for p in self.model.parameters() if p.requires_grad and p.grad is not None] distributed.all_reduce_and_rescale_tensors( grads, float(1)) self.optim.step()
def validate(self, valid_iter): # Set model in validating mode. self.model.eval() stats = Statistics() with torch.no_grad(): for batch in valid_iter: src = batch.src src_lengths = batch.src_length labels = batch.labels if(self.args.structured): roots, mask = self.model(src, labels, src_lengths) r = torch.clamp(roots[-1], 1e-5, 1 - 1e-5) loss = self.loss(r, labels) else: sent_scores, mask = self.model(src, labels, src_lengths) loss = self.loss(sent_scores, labels) loss = (loss * mask.float()).sum() batch_stats = Statistics(float(loss.cpu().data.numpy()), len(labels)) stats.update(batch_stats) return stats
def _gradient_accumulation(self, true_batchs, normalization, total_stats, report_stats): if self.grad_accum_count > 1: self.model.zero_grad() for batch in true_batchs: src = batch.src src_lengths = batch.src_length labels = batch.labels if self.grad_accum_count == 1: self.model.zero_grad() if(self.args.structured): roots, mask = self.model(src, labels, src_lengths) loss = 0 for r in roots: r = torch.clamp(r, 1e-5, 1 - 1e-5) _loss = self.loss(r, labels) _loss = (_loss * mask.float()).sum() loss += _loss loss = loss/len(roots) (loss / loss.numel()).backward() else: sent_scores, mask = self.model(src, labels, src_lengths) loss = self.loss(sent_scores, labels) loss = (loss*mask.float()).sum() (loss/loss.numel()).backward() # loss.div(float(normalization)).backward() batch_stats = Statistics(float(loss.cpu().data.numpy()), normalization) total_stats.update(batch_stats) report_stats.update(batch_stats) # 4. Update the parameters and statistics. if self.grad_accum_count == 1: self.optim.step() # in case of multi step gradient accumulation, # update only after accum batches if self.grad_accum_count > 1: self.optim.step()
def test(self, test_iter, step, cal_lead=False, cal_oracle=False): """ Validate model. valid_iter: validate data iterator Returns: :obj:`nmt.Statistics`: validation loss statistics """ # Set model in validating mode. def _get_ngrams(n, text): ngram_set = set() text_length = len(text) max_index_ngram_start = text_length - n for i in range(max_index_ngram_start + 1): ngram_set.add(tuple(text[i:i + n])) return ngram_set def _block_tri(c, p): tri_c = _get_ngrams(3, c.split()) for s in p: tri_s = _get_ngrams(3, s.split()) if len(tri_c.intersection(tri_s))>0: return True return False if (not cal_lead and not cal_oracle): self.model.eval() stats = Statistics() can_path = '%s_step%d.candidate'%(self.args.result_path,step) gold_path = '%s_step%d.gold' % (self.args.result_path, step) with open(can_path, 'w') as save_pred: with open(gold_path, 'w') as save_gold: with torch.no_grad(): for batch in test_iter: src = batch.src labels = batch.labels segs = batch.segs clss = batch.clss mask = batch.mask mask_cls = batch.mask_cls gold = [] pred = [] if (cal_lead): selected_ids = [list(range(batch.clss.size(1)))] * batch.batch_size elif (cal_oracle): selected_ids = [[j for j in range(batch.clss.size(1)) if labels[i][j] == 1] for i in range(batch.batch_size)] else: sent_scores, mask = self.model(src, segs, clss, mask, mask_cls) loss = self.loss(sent_scores, labels.float()) loss = (loss * mask.float()).sum() batch_stats = Statistics(float(loss.cpu().data.numpy()), len(labels)) stats.update(batch_stats) sent_scores = sent_scores + mask.float() sent_scores = sent_scores.cpu().data.numpy() selected_ids = np.argsort(-sent_scores, 1) # selected_ids = np.sort(selected_ids,1) for i, idx in enumerate(selected_ids): _pred = [] if(len(batch.src_str[i])==0): continue for j in selected_ids[i][:len(batch.src_str[i])]: if(j>=len( batch.src_str[i])): continue candidate = batch.src_str[i][j].strip() if(self.args.block_trigram): if(not _block_tri(candidate,_pred)): _pred.append(candidate) else: _pred.append(candidate) if ((not cal_oracle) and (not self.args.recall_eval) and len(_pred) == 3): break _pred = '<q>'.join(_pred) _pred=_pred+" original txt: "+" ".join(batch.src_str[i]) if(self.args.recall_eval): _pred = ' '.join(_pred.split()[:len(batch.tgt_str[i].split())]) pred.append(_pred) gold.append(batch.tgt_str[i]) for i in range(len(gold)): save_gold.write(gold[i].strip()+'\n') for i in range(len(pred)): save_pred.write(pred[i].strip()+'\n') if(step!=-1 and self.args.report_rouge): rouges = test_rouge(self.args.temp_dir, can_path, gold_path) logger.info('Rouges at step %d \n%s' % (step, rouge_results_to_str(rouges))) self._report_step(0, step, valid_stats=stats) return stats
def train(self, train_iter_fct, train_steps, valid_iter_fct=None, valid_steps=-1): """ The main training loops. by iterating over training data (i.e. `train_iter_fct`) and running validation (i.e. iterating over `valid_iter_fct` Args: train_iter_fct(function): a function that returns the train iterator. e.g. something like train_iter_fct = lambda: generator(*args, **kwargs) valid_iter_fct(function): same as train_iter_fct, for valid data train_steps(int): valid_steps(int): save_checkpoint_steps(int): Return: None """ logger.info('Start training...') # step = self.optim._step + 1 step = self.optim._step + 1 true_batchs = [] accum = 0 normalization = 0 neg_valid_loss = [] # minheap, minum value at top heapq.heapify(neg_valid_loss) # use neg loss to find top 3 largest neg loss total_stats = Statistics() report_stats = Statistics() self._start_report_manager(start_time=total_stats.start_time) #select_counts = np.random.choice(range(3), train_steps + 1) cur_epoch = 0 train_iter = train_iter_fct() #logger.info('Current Epoch:%d' % cur_epoch) #logger.info('maxEpoch:%d' % self.args.max_epoch) #while step <= train_steps: while cur_epoch < self.args.max_epoch: reduce_counter = 0 logger.info('Current Epoch:%d' % cur_epoch) for i, batch in enumerate(train_iter): if self.n_gpu == 0 or (i % self.n_gpu == self.gpu_rank): # from batch.labels, add selected sent index to batch # after teacher forcing, use model selected sentences # or infer scores of batch and get selected sent index # then add selected sent index to the batch true_batchs.append(batch) #normalization += batch.batch_size ##loss normalized wrong normalization = batch.batch_size ##loss recorded correspond to each minibatch accum += 1 if accum == self.grad_accum_count: reduce_counter += 1 if self.n_gpu > 1: normalization = sum(distributed .all_gather_list (normalization)) self._gradient_accumulation( true_batchs, normalization, total_stats, report_stats) report_stats = self._maybe_report_training( step, train_steps, self.optim.learning_rate, report_stats) true_batchs = [] accum = 0 normalization = 0 if (step % self.save_checkpoint_steps == 0 and self.gpu_rank == 0): valid_iter =data_loader.Dataloader(self.args, load_dataset(self.args, 'valid', shuffle=False), self.args.batch_size * 10, self.device, shuffle=False, is_test=True) #batch_size train: 3000, test: 60000 stats = self.validate(valid_iter, step, self.args.valid_by_rouge) self.model.train() # back to training cur_valid_loss = stats.xent() checkpoint_path = os.path.join(self.args.model_path, 'model_step_%d.pt' % step) if len(neg_valid_loss) < self.args.save_model_count: self._save(step) heapq.heappush(neg_valid_loss, (-cur_valid_loss, checkpoint_path)) else: if -cur_valid_loss > neg_valid_loss[0][0]: heapq.heappush(neg_valid_loss, (-cur_valid_loss, checkpoint_path)) worse_loss, worse_model = heapq.heappop(neg_valid_loss) os.remove(worse_model) self._save(step) #else do not save it logger.info('step_%d:%s' % (step, str(neg_valid_loss))) step += 1 if step > train_steps: break cur_epoch += 1 train_iter = train_iter_fct() return total_stats, neg_valid_loss
def train(self, train_iter_fct, train_steps, valid_iter_fct=None, valid_steps=-1): """ The main training loops. by iterating over training data (i.e. `train_iter_fct`) and running validation (i.e. iterating over `valid_iter_fct` Args: train_iter_fct(function): a function that returns the train iterator. e.g. something like train_iter_fct = lambda: generator(*args, **kwargs) valid_iter_fct(function): same as train_iter_fct, for valid data train_steps(int): valid_steps(int): save_checkpoint_steps(int): Return: None """ logger.info('Start training...') # step = self.optim._step + 1 step = self.optim._step + 1 true_batchs = [] accum = 0 normalization = 0 train_iter = train_iter_fct() total_stats = Statistics() report_stats = Statistics() self._start_report_manager(start_time=total_stats.start_time) while step <= train_steps: reduce_counter = 0 for i, batch in enumerate(train_iter): if self.n_gpu == 0 or (i % self.n_gpu == self.gpu_rank): true_batchs.append(batch) normalization += batch.batch_size accum += 1 if accum == self.grad_accum_count: reduce_counter += 1 if self.n_gpu > 1: normalization = sum(distributed .all_gather_list (normalization)) self._gradient_accumulation( true_batchs, normalization, total_stats, report_stats) report_stats = self._maybe_report_training( step, train_steps, self.optim.learning_rate, report_stats) true_batchs = [] accum = 0 normalization = 0 if (step % self.save_checkpoint_steps == 0 and self.gpu_rank == 0): self._save(step) step += 1 if step > train_steps: break train_iter = train_iter_fct() return total_stats
def validate(self, valid_iter, step=0, valid_by_rouge=False): """ Validate model. valid_iter: validate data iterator Returns: :obj:`nmt.Statistics`: validation loss statistics """ # Set model in validating mode. self.model.eval() stats = Statistics() with torch.no_grad(): for batch in valid_iter: src, labels, segs = batch.src, batch.labels, batch.segs clss, mask, mask_cls = batch.clss, batch.mask, batch.mask_cls #group_idxs, pair_masks = batch.groups, batch.pair_masks group_idxs = batch.groups soft_labels = batch.soft_labels candi_masks = batch.candi_masks if valid_by_rouge: src_str, tgt_str = batch.src_str, batch.tgt_str # add negative rouge score as loss to be used as a criterion sel_sent_idxs, sel_sent_masks = self.model.infer_sentences(batch, 3) sel_sent_idxs = sel_sent_idxs.tolist() total_rouge = 0. for i in range(len(sel_sent_idxs)): rouge = du.cal_rouge_doc(src_str[i], tgt_str[i], sel_sent_idxs[i], sel_sent_masks[i]) total_rouge += rouge loss = -total_rouge else: if self.args.model_name == 'seq': sent_scores, _ = self.model(src, mask, segs, clss, mask_cls, group_idxs, pair_masks, sel_sent_idxs=sel_sent_idxs, sel_sent_masks=sel_sent_masks, candi_sent_masks=candi_masks) #batch, seq_len, sent_count pred = sent_scores.contiguous().view(-1, sent_scores.size(2)) gold = batch.label_seq.contiguous().view(-1) if self.args.use_rouge_label: soft_labels = soft_labels.contiguous().view(-1, soft_labels.size(2)) #batch*seq_len, sent_count log_prb = F.log_softmax(pred, dim=1) non_pad_mask = gold.ne(-1) # padding value sent_mask = mask_cls.unsqueeze(1).expand(-1,sent_scores.size(1),-1) sent_mask = sent_mask.contiguous().view(-1, sent_scores.size(2)) loss = -((soft_labels * log_prb) * sent_mask.float()).sum(dim=1) loss = loss.masked_select(non_pad_mask).sum() # average later else: loss = F.cross_entropy(pred, gold, ignore_index=-1, reduction='sum') else: sel_sent_idxs, sel_sent_masks = batch.sel_sent_idxs, batch.sel_sent_masks sent_scores, _ = self.model(src, mask, segs, clss, mask_cls, group_idxs, \ sel_sent_idxs=sel_sent_idxs, sel_sent_masks=batch.sel_sent_masks, candi_sent_masks=candi_masks, is_test=True, sel_sent_hit_map=batch.hit_map) if self.args.use_rouge_label: labels = soft_labels if self.args.loss == "bce": loss = self.bce_logits_loss(sent_scores, labels.float()) #pointwise elif self.args.loss == "wsoftmax": loss = -self.logsoftmax(sent_scores) * labels.float() #weighted average else: sum_labels = labels.sum(dim=-1).unsqueeze(-1).expand_as(labels) labels = torch.where(sum_labels==0, labels, labels/sum_labels) loss = -self.logsoftmax(sent_scores) * labels.float() #batch_size, max_sent_count loss = (loss*candi_masks.float()).sum() loss = float(loss.cpu().data.numpy()) batch_stats = Statistics(loss, len(labels)) stats.update(batch_stats) self._report_step(0, step, valid_stats=stats) return stats
def test(self, test_iter, step, cal_lead=False, cal_oracle=False): """ Validate model. valid_iter: validate data iterator Returns: :obj:`nmt.Statistics`: validation loss statistics """ # Set model in validating mode. def _get_ngrams(n, text): ngram_set = set() text_length = len(text) max_index_ngram_start = text_length - n for i in range(max_index_ngram_start + 1): ngram_set.add(tuple(text[i:i + n])) return ngram_set def _block_tri(c, p): tri_c = _get_ngrams(3, c.split()) for s in p: tri_s = _get_ngrams(3, s.split()) if len(tri_c.intersection(tri_s))>0: return True return False if (not cal_lead and not cal_oracle): self.model.eval() stats = Statistics() base_dir = os.path.dirname(self.args.result_path) if (not os.path.exists(base_dir)): os.makedirs(base_dir) can_path = '%s_step%d_initial.candidate'%(self.args.result_path,step) gold_path = '%s_step%d_initial.gold' % (self.args.result_path, step) all_pred_ids, all_gold_ids, all_doc_ids = [], [], [] all_gold_texts, all_pred_texts = [], [] with torch.no_grad(): for batch in test_iter: src = batch.src labels = batch.labels segs = batch.segs clss = batch.clss mask = batch.mask mask_cls = batch.mask_cls doc_ids = batch.doc_id group_idxs = batch.groups oracle_ids = [set([j for j in seq if j > -1]) for seq in batch.label_seq.tolist()] if (cal_lead): selected_ids = [list(range(batch.clss.size(1)))] * batch.batch_size elif (cal_oracle): selected_ids = [[j for j in range(batch.clss.size(1)) if labels[i][j] == 1] for i in range(batch.batch_size)] else: sent_scores, mask = self.model(src, mask, segs, clss, mask_cls, group_idxs, candi_sent_masks=mask_cls, is_test=True) #selected sentences in candi_masks can be set to 0 loss = -self.logsoftmax(sent_scores) * labels.float() #batch_size, max_sent_count loss = (loss*mask.float()).sum() batch_stats = Statistics(float(loss.cpu().data.numpy()), len(labels)) stats.update(batch_stats) sent_scores[mask==False] = float('-inf') # give a cap 1 to sentscores, so no need to add 1000 sent_scores = sent_scores.cpu().data.numpy() selected_ids = np.argsort(-sent_scores, 1) for i, idx in enumerate(selected_ids): _pred = [] _pred_ids = [] if(len(batch.src_str[i])==0): continue for j in selected_ids[i][:len(batch.src_str[i])]: if(j>=len( batch.src_str[i])): continue candidate = batch.src_str[i][j].strip() if(self.args.block_trigram): if(not _block_tri(candidate,_pred)): _pred.append(candidate) _pred_ids.append(j) else: _pred.append(candidate) _pred_ids.append(j) if ((not cal_oracle) and (not self.args.recall_eval) and len(_pred) == 3): break _pred = '<q>'.join(_pred) if(self.args.recall_eval): _pred = ' '.join(_pred.split()[:len(batch.tgt_str[i].split())]) all_pred_texts.append(_pred) all_pred_ids.append(_pred_ids) all_gold_texts.append(batch.tgt_str[i]) all_gold_ids.append(oracle_ids[i]) all_doc_ids.append(doc_ids[i]) macro_precision, micro_precision = self._output_predicted_summaries( all_doc_ids, all_pred_ids, all_gold_ids, all_pred_texts, all_gold_texts, can_path, gold_path) rouge1_arr, rouge2_arr = du.cal_rouge_score(all_pred_texts, all_gold_texts) rouge_1, rouge_2 = du.aggregate_rouge(rouge1_arr, rouge2_arr) logger.info('[PERF]At step %d: rouge1:%.2f rouge2:%.2f' % ( step, rouge_1 * 100, rouge_2 * 100)) if(step!=-1 and self.args.report_precision): macro_arr = ["P@%s:%.2f%%" % (i+1, macro_precision[i] * 100) for i in range(3)] micro_arr = ["P@%s:%.2f%%" % (i+1, micro_precision[i] * 100) for i in range(3)] logger.info('[PERF]MacroPrecision at step %d: %s' % (step, '\t'.join(macro_arr))) logger.info('[PERF]MicroPrecision at step %d: %s' % (step, '\t'.join(micro_arr))) if(step!=-1 and self.args.report_rouge): rouge_str, detail_rouge = test_rouge(self.args.temp_dir, can_path, gold_path, all_doc_ids, show_all=True) logger.info('[PERF]Rouges at step %d: %s \n' % (step, rouge_str)) result_path = '%s_step%d_initial.rouge' % (self.args.result_path, step) if detail_rouge is not None: du.output_rouge_file(result_path, rouge1_arr, rouge2_arr, detail_rouge, all_doc_ids) self._report_step(0, step, valid_stats=stats) return stats
def predict(self, test_iter, step, result_file='predicted_titles.csv', cal_lead=False, cal_oracle=False): """ Predict model. test_iter: predict data iterator Returns: :obj:`nmt.Statistics`: predict loss statistics """ # Set model in validating mode. def _get_ngrams(n, text): ngram_set = set() text_length = len(text) max_index_ngram_start = text_length - n for i in range(max_index_ngram_start + 1): ngram_set.add(tuple(text[i:i + n])) return ngram_set def _block_tri(c, p): tri_c = _get_ngrams(3, c.split()) for s in p: tri_s = _get_ngrams(3, s.split()) if len(tri_c.intersection(tri_s)) > 0: return True return False if (not cal_lead and not cal_oracle): self.model.eval() stats = Statistics() can_path = '%s_step%d.results' % (self.args.result_path, step) pred = [] source = [] with open(can_path, 'w') as save_pred: with torch.no_grad(): for batch in test_iter: src = batch.src labels = batch.labels segs = batch.segs clss = batch.clss mask = batch.mask mask_cls = batch.mask_cls if (cal_lead): selected_ids = [list(range(batch.clss.size(1))) ] * batch.batch_size elif (cal_oracle): selected_ids = [[ j for j in range(batch.clss.size(1)) if labels[i][j] == 1 ] for i in range(batch.batch_size)] else: sent_scores, mask = self.model(src, segs, clss, mask, mask_cls) loss = self.loss(sent_scores, labels.float()) loss = (loss * mask.float()).sum() batch_stats = Statistics( float(loss.cpu().data.numpy()), len(labels)) stats.update(batch_stats) sent_scores = sent_scores + mask.float() sent_scores = sent_scores.cpu().data.numpy() selected_ids = np.argsort(-sent_scores, 1) # selected_ids = np.sort(selected_ids,1) for i, idx in enumerate(selected_ids): _pred = [] if (len(batch.src_str[i]) == 0): continue for j in selected_ids[i][:len(batch.src_str[i])]: if (j >= len(batch.src_str[i])): continue candidate = batch.src_str[i][j].strip() if (self.args.block_trigram): if (not _block_tri(candidate, _pred)): _pred.append(candidate) else: _pred.append(candidate) if ((not cal_oracle) and (not self.args.recall_eval) and len(_pred) == 3): break _pred = '<q>'.join(_pred) if (self.args.recall_eval): _pred = ' '.join( _pred.split()[:len(batch.tgt_str[i].split())]) pred.append(_pred) source.append(''.join(batch.src_str[i])) submission_df = pd.DataFrame({'abstract': source, 'title': pred}) submission_df.to_csv(result_file, index=False) # for i in range(len(pred)): # save_pred.write( pred[i].strip().replace(chr(240), "").replace('"', '').replace("'", "").rstrip() + ' ' + chr(240) + ' ' + source[i].strip().replace(chr(240), "").replace('"', '').replace("'", "").rstrip() + ' ' + chr(240) + '\n' ) return stats
def summary(self, test_iter, step, cal_lead=False, cal_oracle=False): """ Validate model. valid_iter: validate data iterator Returns: :obj:`nmt.Statistics`: validation loss statistics """ # Set model in validating mode. def _get_ngrams(n, text): ngram_set = set() text_length = len(text) max_index_ngram_start = text_length - n for i in range(max_index_ngram_start + 1): ngram_set.add(tuple(text[i:i + n])) return ngram_set def _block_tri(c, p): tri_c = _get_ngrams(3, c.split()) for s in p: tri_s = _get_ngrams(3, s.split()) if len(tri_c.intersection(tri_s))>0: return True return False if (not cal_lead and not cal_oracle): self.model.eval() stats = Statistics() with torch.no_grad(): for batch in test_iter: src = batch.src labels = batch.labels segs = batch.segs clss = batch.clss mask = batch.mask mask_cls = batch.mask_cls gold = [] pred = [] if (cal_lead): selected_ids = [list(range(batch.clss.size(1)))] * batch.batch_size elif (cal_oracle): selected_ids = [[j for j in range(batch.clss.size(1)) if labels[i][j] == 1] for i in range(batch.batch_size)] else: sent_scores, mask = self.model(src, segs, clss, mask, mask_cls) loss = self.loss(sent_scores, labels.float()) loss = (loss * mask.float()).sum() batch_stats = Statistics(float(loss.cpu().data.numpy()), len(labels)) stats.update(batch_stats) sent_scores = sent_scores + mask.float() sent_scores = sent_scores.cpu().data.numpy() selected_ids = np.argsort(-sent_scores, 1) # selected_ids = np.sort(selected_ids,1) for i, idx in enumerate(selected_ids): _pred = [] if(len(batch.src_str[i])==0): continue for j in selected_ids[i][:len(batch.src_str[i])]: if(j>=len(batch.src_str[i])): continue candidate = batch.src_str[i][j].strip() if(self.args.block_trigram): if(not _block_tri(candidate,_pred)): _pred.append(candidate) else: _pred.append(candidate) if ((not cal_oracle) and (not self.args.recall_eval) and len(_pred) == 3): break _pred = '<q>'.join(_pred) if(self.args.recall_eval): _pred = ' '.join(_pred.split()[:len(batch.tgt_str[i].split())]) pred.append(_pred) gold.append(batch.tgt_str[i]) print(' '.join(pred), ' '.join(gold)) return pred
def _gradient_accumulation(self, true_batchs, normalization, total_stats, report_stats): if self.grad_accum_count > 1: self.model.zero_grad() for batch in true_batchs: if self.grad_accum_count == 1: self.model.zero_grad() src = batch.src labels = batch.labels segs = batch.segs clss = batch.clss mask = batch.mask mask_cls = batch.mask_cls group_idxs = batch.groups #they need to have these two attributes sel_sent_idxs = batch.sel_sent_idxs sel_sent_masks = batch.sel_sent_masks candi_masks = batch.candi_masks #pair_masks = batch.pair_masks src_str, tgt_str = batch.src_str, batch.tgt_str soft_labels = batch.soft_labels if self.args.model_name == 'seq': sent_scores, _ = self.model(src, mask, segs, clss, mask_cls, group_idxs, sel_sent_idxs=sel_sent_idxs, sel_sent_masks=sel_sent_masks, candi_sent_masks=candi_masks) #batch, seq_len, sent_count pred = sent_scores.contiguous().view(-1, sent_scores.size(2)) gold = batch.label_seq.contiguous().view(-1) if self.args.use_rouge_label: soft_labels = soft_labels.contiguous().view(-1, soft_labels.size(2)) #batch*seq_len, sent_count log_prb = F.log_softmax(pred, dim=1) non_pad_mask = gold.ne(-1) # padding value sent_mask = mask_cls.unsqueeze(1).expand(-1,sent_scores.size(1),-1) sent_mask = sent_mask.contiguous().view(-1, sent_scores.size(2)) loss = -((soft_labels * log_prb) * sent_mask.float()).sum(dim=1) loss = loss.masked_select(non_pad_mask).sum() # average later else: loss = F.cross_entropy(pred, gold, ignore_index=-1, reduction='sum') else: sent_scores, _ = self.model(src, mask, segs, clss, mask_cls, group_idxs, sel_sent_idxs=sel_sent_idxs, sel_sent_masks=sel_sent_masks, candi_sent_masks=candi_masks, sel_sent_hit_map=batch.hit_map) if self.args.use_rouge_label: labels = soft_labels if self.args.loss == "bce": loss = self.bce_logits_loss(sent_scores, labels.float()) #pointwise elif self.args.loss == "wsoftmax": loss = -self.logsoftmax(sent_scores) * labels.float() #batch_size, max_sent_count loss = (loss*candi_masks.float()).sum() #print("loss_sum", loss) (loss/loss.numel()).backward() batch_stats = Statistics(float(loss.cpu().data.numpy()), normalization) total_stats.update(batch_stats) report_stats.update(batch_stats) #print([p for p in self.model.parameters() if p.requires_grad]) # 4. Update the parameters and statistics. if self.grad_accum_count == 1: # Multi GPU gradient gather if self.n_gpu > 1: grads = [p.grad.data for p in self.model.parameters() if p.requires_grad and p.grad is not None] distributed.all_reduce_and_rescale_tensors( grads, float(1)) self.optim.step() # in case of multi step gradient accumulation, # update only after accum batches if self.grad_accum_count > 1: if self.n_gpu > 1: grads = [p.grad.data for p in self.model.parameters() if p.requires_grad and p.grad is not None] distributed.all_reduce_and_rescale_tensors( grads, float(1)) self.optim.step()