def on_backward_begin(self, loss): if self._log_loss_every > 0: self._avg_loss += loss.item() if self.step % self._log_loss_every == 0: fitlog.add_loss(self._avg_loss / self._log_loss_every * self.update_every, name='loss', step=self.step, epoch=self.epoch) self._avg_loss = 0
def fitlog_add_loss(value, epoch, name): if fitlog_loss_step.get(name) is None: fitlog_loss_step[name] = 0 fitlog_loss_step[name] += 1 fitlog.add_loss(value=value, step=fitlog_loss_step[name], epoch=epoch, name=name)
def train(step, model, data_loader, optim, device): model.train() total = 0 correct = 0 losses = [] pbtr = tqdm(total=len(data_loader)) for src, trg in data_loader: loss, to, cor = train_step(src, trg, model, optim, device) total += to correct += cor losses.append(loss) pbtr.update(1) pbtr.set_postfix({'accuracy': cor / to, "loss": loss, "ppl": math.exp(loss)}) pbtr.close() print("training epoch {} || ppl {} ||accuracy {} || loss {}".format(step, math.exp(mean(losses)), (correct / total), mean(losses))) fitlog.add_loss(math.exp(mean(losses)), step=step, name='train') fitlog.add_metric(correct / total, step=step, name='train accuracy')
def eval(step, model, data_loader, best_loss, device): model.eval() total = 0 correct = 0 losses = [] pbtr = tqdm(total=len(data_loader)) for src, trg in data_loader: loss, to, cor = eval_step(src, trg, model, device) total += to correct += cor losses.append(loss) pbtr.update(1) # pbtr.set_postfix({'accuracy':cor/to,"loss":loss,"ppl":math.exp(loss)}) pbtr.close() fitlog.add_loss(math.exp(mean(losses)), step=step, name='eval') fitlog.add_metric(correct / total, step=step, name='eval accuracy') if best_loss > mean(losses): torch.save(model.state_dict(), './model/best_model.pkl') best_loss = mean(losses) print("saving to best_model.pkl") print("eval epoch {} || ppl {} ||accuracy {} || loss {} ||best loss {}".format(step, math.exp(mean(losses)), correct / total, mean(losses), best_loss)) return best_loss
def forward( self, input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, inputs_embeds=None, labels=None, output_attentions=None, output_hidden_states=None, return_dict=None, Training=False, do_cvae=0, # 先做几次cvae global_step=0, args=None ): r""" labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`): Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ..., config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss), If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy). """ return_dict = return_dict if return_dict is not None else self.config.use_return_dict outputs = self.roberta( input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, position_ids=position_ids, head_mask=head_mask, inputs_embeds=inputs_embeds, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict, ) sequence_output = outputs[0] # 在这修改代码 sequence_output = self.laynorm(sequence_output) # 数据增强 sequence_output = self._add_attention(sequence_output, attention_mask) # logger.info('seq.shape: ' + str(sequence_output.shape)) if do_cvae > 0: # 训练CVAE out, mu, logvar = self.cvae(x=sequence_output, y=labels, Training=True, device=args.device) # 原始数据+con y_c = self.cvae.to_categrical(labels, device=args.device) # y_c = y_c.unsqueeze(1) # 输入样本和标签y的one-hot向量连接 con = nn.Dropout(args.cvae_dropout)(sequence_output) + y_c y_n = self.cvae.to_categrical_neg(labels, device=args.device) # y_n = y_n.unsqueeze(1) neg_con = nn.Dropout(args.cvae_dropout)(sequence_output ) + y_n # update: 4/26 # sim = Similarity(1000) # pos_sim = sim(out.unsqueeze(2), con.unsqueeze(2)) # neg_sim = sim(out.unsqueeze(2), neg_con.unsqueeze(2)) # [8, 256] # cos_sim = torch.cat([pos_sim, neg_sim], 2) # pos_sim = pos_sim.expand(-1, -1, 2) # logger.info(str(pos_sim.shape)) # logger.info(str(cos_sim.shape)) # version1: cvae的loss,seq cvae_loss = CVAEModel.loss_function(recon_x=out, x=con, mu=mu, logvar=logvar) cvae_loss_neg = CVAEModel.loss_function(recon_x=out, x=neg_con, mu=mu, logvar=logvar) # version2: Cos_sim loss # print(con.shape) # sim_contrast_loss = CVAEModel.loss_function(recon_x=pos_sim, x=neg_sim, mu=mu, logvar=logvar) # logger.info(str(sim_contrast_loss)) sequence_output = self.cvae(sequence_output) sequence_output = self.projector(sequence_output) else: # 训练完毕,使用训练好的编码器 sequence_output = self.cvae(sequence_output) sequence_output = self.projector(sequence_output) pass sequence_output = self.laynorm(sequence_output) logits = self.classifier(sequence_output) # logger.info('label: ' + str(labels) + str(labels.shape)) loss = None if labels is not None: if self.num_labels == 1: # We are doing regression loss_fct = MSELoss() loss = loss_fct(logits.view(-1), labels.view(-1)) else: loss_fct = CrossEntropyLoss() # [8, 2], [8] loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) # 多任务? if do_cvae > 0: # print(loss, cvae_loss, cvae_loss_neg) loss = loss + args.cvae_beta * cvae_loss - args.cvae_theta * cvae_loss_neg # loss = loss + args.cvae_theta * sim_contrast_loss if global_step % args.logging_steps == 0: # fitlog.add_loss(sim_contrast_loss, name='sim loss', step=global_step) fitlog.add_loss(cvae_loss*1000%1000, name = 'positive_loss', step=global_step) fitlog.add_loss(cvae_loss_neg*1000%1000, name = 'negative_loss', step=global_step) fitlog.add_loss(cvae_loss - cvae_loss_neg, name = 'pos - neg', step=global_step) if not return_dict: output = (logits,) + outputs[2:] return ((loss,) + output) if loss is not None else output return SequenceClassifierOutput( loss=loss, logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions, )
def fit(self): last_miou = .0 # record the best validation mIoU loss_step = 0 # step count for epoch in range(self.conf.epochs): train_loss = .0 start = time.time() for i, (data, target) in enumerate(self.train_iter): gpu_datas = split_and_load(data, ctx_list=self.ctx) gpu_targets = split_and_load(target, ctx_list=self.ctx) with autograd.record(): loss_gpu = [ self.criterion(*self.net(gpu_data), gpu_target) for gpu_data, gpu_target in zip( gpu_datas, gpu_targets) ] for loss in loss_gpu: autograd.backward(loss) self.trainer.step(self.conf.bs_train) nd.waitall() loss_temp = .0 for losses in loss_gpu: loss_temp += losses.sum().asscalar() train_loss += (loss_temp / self.conf.bs_train) # log every n batch # add loss to draw curve, train_loss <class numpy.float64> interval = 5 if loss_step < 5000 else 50 if (i % interval == 0) or (i + 1 == len(self.train_iter)): fitlog.add_loss(name='loss', value=round(train_loss / (i + 1), 5), step=loss_step) loss_step += 1 self.logger.info( "Epoch %d, batch %d, training loss %.5f." % (epoch, i, train_loss / (i + 1))) # log each epoch self.logger.info( ">>>>>> Epoch %d complete, time cost: %.1f sec. <<<<<<" % (epoch, time.time() - start)) # validation each epoch if self.val: pixel_acc, mean_iou = self._validation() self.logger.info( "Epoch %d validation, PixelAccuracy: %.4f, mIoU: %.4f." % (epoch, pixel_acc, mean_iou)) fitlog.add_metric(value=mean_iou, step=epoch, name='mIoU') fitlog.add_metric(value=pixel_acc, step=epoch, name='PA') if mean_iou > last_miou: f_name = self._save_model(tag='best') self.logger.info( "Epoch %d mIoU: %.4f > %.4f(previous), save model: %s" % (epoch, mean_iou, last_miou, f_name)) last_miou = mean_iou # save the final-epoch params f_name = self._save_model(tag='last') self.logger.info(">>>>>> Training complete, save model: %s. <<<<<<" % f_name) # record fitlog.add_best_metric(value=round(last_miou, 4), name='mIoU') fitlog.add_other(value=self.id, name='record_id') fitlog.add_other(value=self.num_train, name='train') fitlog.add_other(value=self.num_val, name='val')
def on_backward_begin(self, loss): fitlog.add_loss(loss.item(), name='loss', step=self.step, epoch=self.epoch)
def train(C, logger, train_data, valid_data, loss_func, generator, n_rel_typs, run_name="0", test_data=None): (batch_numb, device), (model, optimizer, scheduler) = before_train(C, logger, train_data, n_rel_typs) #----- iterate each epoch ----- best_epoch = -1 best_metric = -1 for epoch_id in range(C.epoch_numb): pbar = tqdm(range(batch_numb), ncols=70) avg_loss = 0 for batch_id in pbar: #----- get data ----- data = train_data[batch_id * C.batch_size:(batch_id + 1) * C.batch_size] sents, ents, anss, data_ent = get_data_from_batch(data, device=device) loss, pred = update_batch(C, logger, model, optimizer, scheduler, loss_func, sents, ents, anss, data_ent) avg_loss += float(loss) fitlog.add_loss(value=float(loss), step=epoch_id * batch_numb + batch_id, name="({0})train loss".format(run_name)) pbar.set_description_str("(Train)Epoch %d" % (epoch_id)) pbar.set_postfix_str("loss = %.4f (avg = %.4f)" % (float(loss), avg_loss / (batch_id + 1))) logger.log("Epoch %d ended. avg_loss = %.4f" % (epoch_id, avg_loss / batch_numb)) micro_f1, macro_f1, test_loss = test( C, logger, valid_data, model, loss_func, generator, "valid", epoch_id, run_name, ) if C.valid_metric in ["macro*micro", "micro*macro"]: metric = macro_f1 * micro_f1 elif C.valid_metric == "macro": metric = macro_f1 elif C.valid_metric == "micro": metric = micro_f1 else: assert False if best_metric < metric: best_epoch = epoch_id best_metric = metric with open(C.tmp_file_name + ".model" + "." + str(run_name), "wb") as fil: pickle.dump(model, fil) # fitlog.add_best_metric(best_macro_f1 , name = "({0})macro f1".format(ensemble_id)) model = model.train() if not C.no_valid: #reload best model with open(C.tmp_file_name + ".model" + "." + str(run_name), "rb") as fil: model = pickle.load(fil) #load best valid model logger.log("reloaded best model at epoch %d" % best_epoch) if test_data is not None: final_micro_f1, final_macro_f1, final_test_loss = test( C, logger, test_data, model, loss_func, generator, "test", epoch_id, run_name, ) return model, best_metric
correct=0 losses=[] for i,(data,label) in enumerate(train_loader): data=data.cuda() label=label.cuda() predict=model(data) loss=loss_function(predict,label) predict_label=torch.argmax(predict,1) correct+=(predict_label==label).cpu().sum().item() total+=label.size()[0] loss.backward() optimizer.step() optimizer.zero_grad() losses.append(loss.item()) # fitlog.add_metric(list(mean(losses)),step=num,name='train_loss') fitlog.add_loss(mean(losses),step=num,name='train_loss') fitlog.add_metric({"train":{"acc":correct/total}},step=num) print("current_epoch_loss:{}".format(str(mean(losses)))) print("current_epoch_accuracy:{}".format(correct/total)) total=0 correct=0 losses=[] for i,(data,label) in enumerate(test_loader): data=data.cuda() label=label.cuda() predict=model(data) loss=loss_function(predict,label) predict_label=torch.argmax(predict,1) correct+=(predict_label==label).cpu().sum().item() total+=label.size()[0] losses.append(loss.item())