def train(epoch): loss_ave = utils.RunningAverage() dice_ave = utils.RunningAverage() for batch_idx, (input, target) in tqdm.tqdm(enumerate(trainloader), total=len(trainloader)): input, target = input, target.cuda(async=True) if not args.parallel: input = input.cuda() seg = seg_model.forward(input, fully=True) seg = F.interpolate(seg, size=(target.size(1), target.size(2), target.size(3)), mode="trilinear", align_corners=True) loss = loss_criterion(seg, target) optimizer.zero_grad() loss.backward() optimizer.step() dice = evaluation_metric(seg, target) loss_ave.update(loss.item(), input.size(0)) dice_ave.update(dice, input.size(0)) log_str = 'TRAIN-------Epoch: %d | Loss: %.5f | Dice: %.5f' % ( epoch, loss_ave.avg, dice_ave.avg) print(log_str) writer.add_scalar('train_loss', loss_ave.avg, epoch) writer.add_scalar('train_dice', dice_ave.avg, epoch)
def train(model, dataLoader, scheduler, optimizer, steps, opt): model.train() lossAll = utils.RunningAverage() Acc = utils.RunningAverage() for it, data in enumerate(dataLoader): for key in data.keys(): if 'num' in key and opt.use_gpu: data[key] = data[key].cuda() batch_logit, batch_pred = model(data) loss = model.cross_entopy_loss(batch_logit, data['num:label_id']) optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=opt.clip_grad) optimizer.step() acc = model.accuracy(batch_pred, data['num:label_id']) lossAll.update(loss.item()) Acc.update(acc.item()) sys.stdout.write( '[Train] step: {}/{} | loss: {:.6f} acc:{:.3f}%'.format(it + 1, steps, lossAll(), Acc()) + '\r') sys.stdout.flush() print() scheduler.step()
def train(model, dataLoader, scheduler, optimizer, steps, opt): model.train() lossAll = utils.RunningAverage() acc_score = utils.RunningAverage() for it, data in enumerate(dataLoader): for key in data.keys(): try: data[key] = data[key].cuda() except: pass loss, batch_pred = model(data) optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=opt.clip_grad) optimizer.step() lossAll.update(loss.item()) acc = torch.sum(batch_pred == 0).item() / len(batch_pred) acc_score.update(acc * 100) sys.stdout.write( '[Train] step: {}/{} | loss: {:.6f}/{:.6f} acc:{:.3f}/acc:{:.3f}%'. format(it + 1, steps, lossAll(), loss.item(), acc_score(), acc * 100) + '\r') sys.stdout.flush() print() scheduler.step()
def validate(epoch, model, device, dataloader, criterion, args, writer): """ Test loop, print metrics """ progbar = tqdm(total=len(dataloader), desc='Val') loss_record = utils.RunningAverage() acc_record = utils.RunningAverage() model.eval() with torch.no_grad(): # for batch_idx, (data,label,_,_) in enumerate(tqdm(islice(dataloader,10))): for batch_idx, (data, label, _, _) in enumerate(tqdm(dataloader)): data, label = data.to(device), label.to(device) output = model(data) loss = criterion(output, label) # measure accuracy and record loss acc = utils.compute_acc(output, label) # acc_record.update(100 * acc[0].item()) acc_record.update(100 * acc[0].item() / data.size(0)) loss_record.update(loss.item()) #print('val Step: {}/{} Loss: {:.4f} \t Acc: {:.4f}'.format(batch_idx,len(dataloader), loss_record(), acc_record())) progbar.set_description('Val (loss=%.4f)' % (loss_record())) progbar.update(1) writer.add_scalar('validation/Loss_epoch', loss_record(), epoch) writer.add_scalar('validation/Acc_epoch', acc_record(), epoch) return loss_record(), acc_record()
def train(model, data_iterator, optimizer, scheduler, params): model.train() scheduler.step() precision_avg = utils.RunningAverage() loss_avg = utils.RunningAverage() t = trange(params.train_steps, desc="Train: ") for _ in t: # fetch the next training batch sources, source_pos, targets, target_pos = next(data_iterator) preds = model(sources, source_pos, targets, target_pos) gold = targets[:, 1:] loss, precision = cal_translator_performance(preds, gold) if params.n_gpu > 1 and params.multi_gpu: loss = loss.mean() # mean() to average on multi-gpu # clear previous gradients, compute gradients of all variables wrt loss model.zero_grad() loss.backward() # gradient clipping nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=params.clip_grad) # performs updates using calculated gradients optimizer.step() loss_avg.update(loss.item()) precision_avg.update(precision) t.set_postfix(loss='{:05.3f}'.format(loss_avg()), precision='{:05.3f}'.format(precision_avg())) return loss_avg(), precision_avg()
def evaluate(model, loss_fn, val_dataloader, params, epoch): # set model to evaluation mode model.eval() loss_TOTAL = utils.RunningAverage() loss_FL = utils.RunningAverage() loss_L1 = utils.RunningAverage() with torch.no_grad(): for i, (img_batch, labels_batch, regression_batch) in enumerate(val_dataloader): img_batch, labels_batch, regression_batch = img_batch.to(device), labels_batch.to(device), regression_batch.to(device) classification_pred, regression_pred, _ = model(img_batch) loss_cls = loss_fn['focal'](labels_batch, classification_pred) * config['loss_ratio_FL2L1'] loss_reg = loss_fn['smooth_l1'](regression_batch, regression_pred) loss_all = loss_cls + loss_reg loss_cls_detach = loss_cls.detach().item() loss_reg_detach = loss_reg.detach().item() loss_all_detach = loss_all.detach().item() # update the average loss loss_TOTAL.update(loss_all_detach) loss_FL.update(loss_cls_detach) loss_L1.update(loss_reg_detach) del img_batch, labels_batch, regression_batch logging.info("total_loss:{:05.3f} FL_loss:{:05.3f} L1_loss:{:05.3f}".format(loss_TOTAL(), loss_FL(), loss_L1())) res = loss_TOTAL() del loss_TOTAL, loss_FL, loss_L1 return res
def train(train_loader, model, model_T, optimizer, criterion, criterion_T, accuracy, args): # set model to training mode model.train() # set teacher model to evaluation mode model_T.eval() # summary for current training loop and a running average object for loss loss_avg = utils.RunningAverage() loss_true_avg = utils.RunningAverage() loss_teacher_avg = utils.RunningAverage() accTop1_avg = utils.RunningAverage() end = time.time() # Use tqdm for progress bar with tqdm(total=len(train_loader)) as t: for i, (train_batch, labels_batch) in enumerate(train_loader): # move to GPU if available train_batch, labels_batch = train_batch.to( device), labels_batch.to(device) # compute model output and loss output_batch = model(train_batch) with torch.no_grad(): teacher_outputs = model_T(train_batch) loss_true = criterion(output_batch, labels_batch) loss_teacher = criterion_T(output_batch, teacher_outputs) loss = loss_true + args.alpha * loss_teacher # Update average loss and accuracy metrics = accuracy(output_batch, labels_batch) accTop1_avg.update(metrics[0].item()) loss_true_avg.update(loss_true.item()) loss_teacher_avg.update(loss_teacher.item()) loss_avg.update(loss.item()) # clear previous gradients, compute gradients of all variables wrt loss optimizer.zero_grad() loss.backward() # performs updates using calculated gradients optimizer.step() t.update() # compute mean of all metrics in summary train_metrics = { 'train_loss': loss_avg.value(), 'train_true_loss': loss_true_avg.value(), 'train_teacher_loss': loss_teacher_avg.value(), 'train_accTop1': accTop1_avg.value(), 'time': time.time() - end } metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in train_metrics.items()) logging.info("- Train metrics: " + metrics_string) return train_metrics
def train(epoch, model, device, dataloader, optimizer, scheduler, criterion, experiment_dir, writer): """ Train loop, predict rotations. """ global iter_cnt # progbar = tqdm(total=len(dataloader), desc='Train') progbar = tqdm(total=10, desc='Train') loss_record = utils.RunningAverage() acc_record = utils.RunningAverage() correct=0 total=0 save_path = experiment_dir + '/' os.makedirs(save_path, exist_ok=True) model.train() # for batch_idx, (data,label,_,_) in enumerate(tqdm(islice(dataloader,10))): for batch_idx, (data, label, _,_) in enumerate(tqdm(dataloader)): data, label = data.to(device), label.to(device) #optimizer.zero_grad() output = model(data) loss = criterion(output, label) # measure accuracy and record loss confidence, predicted = output.max(1) correct += predicted.eq(label).sum().item() #acc = utils.compute_acc(output, label) total+=label.size(0) acc = correct/total acc_record.update(100*acc) loss_record.update(loss.item()) writer.add_scalar('train/Loss_batch', loss.item(), iter_cnt) writer.add_scalar('train/Acc_batch', acc, iter_cnt) iter_cnt+=1 # logging.info('Train Step: {}/{} Loss: {:.4f}; Acc: {:.4f}'.format(batch_idx,len(dataloader), loss_record(), acc_record())) # compute gradient and do optimizer step optimizer.zero_grad() loss.backward() optimizer.step() progbar.set_description('Train (loss=%.4f)' % (loss_record())) progbar.update(1) if scheduler: scheduler.step() LR=optimizer.param_groups[0]['lr'] writer.add_scalar('train/Loss_epoch', loss_record(), epoch) writer.add_scalar('train/Acc_epoch', acc_record(), epoch) logging.info('Train Epoch: {} LR: {:.4f} Avg Loss: {:.4f}; Avg Acc: {:.4f}'.format(epoch,LR, loss_record(), acc_record())) return loss_record,acc_record
def train(model, optimizer, loss_fn, dataloader, params, epoch): """Train the model on `num_steps` batches Args: model: (torch.nn.Module) the neural network optimizer: (torch.optim) optimizer for parameters of model loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch dataloader: (DataLoader) a torch.utils.data.DataLoader object that fetches training data metrics: (dict) a dictionary of functions that compute a metric using the output and labels of each batch params: (Params) hyperparameters num_steps: (int) number of batches to train on, each of size params.batch_size """ loss_TOTAL = utils.RunningAverage() loss_FL = utils.RunningAverage() loss_L1 = utils.RunningAverage() model.train() if epoch < params.frozen_epochs: model.train_extractor(False) else: model.train_extractor(True) with tqdm(total=len(dataloader)) as t: for i, (img_batch, labels_batch, regression_batch) in enumerate(dataloader): img_batch, labels_batch, regression_batch = img_batch.to(device), labels_batch.to(device), regression_batch.to(device) classification_pred, regression_pred, _ = model(img_batch) loss_cls = loss_fn['focal'](labels_batch, classification_pred) * config['loss_ratio_FL2L1'] loss_reg = loss_fn['smooth_l1'](regression_batch, regression_pred) loss_all = loss_cls + loss_reg loss_cls_detach = loss_cls.detach().item() loss_reg_detach = loss_reg.detach().item() loss_all_detach = loss_all.detach().item() # clear previous gradients, compute gradients of all variables wrt loss optimizer.zero_grad() loss_all.backward() # The norm is computed over all gradients together clip_grad_norm_(model.parameters(), 0.5) # performs updates using calculated gradients optimizer.step() # update the average loss loss_TOTAL.update(loss_all_detach) loss_FL.update(loss_cls_detach) loss_L1.update(loss_reg_detach) del img_batch, labels_batch, regression_batch t.set_postfix(total_loss='{:05.3f}'.format(loss_all_detach), FL_loss='{:05.3f}'.format( loss_cls_detach), L1_loss='{:05.3f}'.format(loss_reg_detach)) t.update() logging.info("total_loss:{:05.3f} FL_loss:{:05.3f} L1_loss:{:05.3f}".format(loss_TOTAL(), loss_FL(), loss_L1())) del loss_TOTAL, loss_FL, loss_L1
def train(train_loader, model, optimizer, criterion, accuracy, args): # set model to training mode model.train() # summary for current training loop and a running average object for loss loss_avg = utils.RunningAverage() accTop1_avg = utils.RunningAverage() accTop5_avg = utils.RunningAverage() end = time.time() # Use tqdm for progress bar with tqdm(total=len(train_loader)) as t: for _, (train_batch, labels_batch) in enumerate(train_loader): train_batch = train_batch.cuda(non_blocking=True) labels_batch = labels_batch.cuda(non_blocking=True) # compute model output and loss output_batch = model(train_batch) loss = criterion(output_batch, labels_batch) # clear previous gradients, compute gradients of all variables wrt loss optimizer.zero_grad() loss.backward() # performs updates using calculated gradients optimizer.step() # Update average loss and accuracy metrics = accuracy(output_batch, labels_batch, topk=(1, 5)) accTop1_avg.update(metrics[0].item()) accTop5_avg.update(metrics[1].item()) loss_avg.update(loss.item()) t.update() # compute mean of all metrics in summary train_metrics = { 'train_loss': loss_avg.value(), 'train_accTop1': accTop1_avg.value(), 'train_accTop5': accTop5_avg.value(), 'time': time.time() - end } metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in train_metrics.items()) logging.info("- Train metrics: " + metrics_string) return train_metrics
def evaluate(args, model, eval_dataloader, params): model.eval() # 记录平均损失 loss_avg = utils.RunningAverage() # tag to id tag2idx = {tag: idx for idx, tag in enumerate(params.tag_list)} # init y_true = [] y_pred = [] # get data for batch in tqdm(eval_dataloader, unit='Batch'): # fetch the next training batch batch = tuple(t.to(params.device) for t in batch) input_ids, input_mask, segment_id, cate = batch # inference with torch.no_grad(): # get loss loss = model(input_ids, token_type_ids=segment_id, attention_mask=input_mask, cate=cate) if params.n_gpu > 1 and args.multi_gpu: loss = loss.mean() # mean() to average on multi-gpu. # update the average loss loss_avg.update(loss.item()) # (bs, tag_size) cls_pre = model(input_ids=input_ids, attention_mask=input_mask, token_type_ids=segment_id) # gold label gold = cate.to('cpu').numpy() # (bs, tag_size) # predict pred = cls_pre.detach().cpu().numpy() # TODO: 规则获取pred pred_threshold = np.where(pred > params.threshold, 1, 0) for idx, p in enumerate(pred_threshold): # 如果有NaN类 if p[tag2idx[STR2IO['NaN']]] == 1: # 如果NaN类概率最大 if np.argmax(pred[idx]) == tag2idx[STR2IO['NaN']]: pred_threshold[idx] = 0 pred_threshold[idx][tag2idx[STR2IO['NaN']]] = 1 else: pred_threshold[idx][tag2idx[STR2IO['NaN']]] = 0 # 如果没有类别,选最大的 if 1 not in p: pred_threshold[idx][np.argmax(pred[idx])] = 1 y_true.append(gold) y_pred.append(pred_threshold) # metrics y_pred = np.concatenate(y_pred, axis=0) y_true = np.concatenate(y_true, axis=0) f1 = f1_score(y_true=y_true, y_pred=y_pred, average='micro') acc = accuracy_score(y_true=y_true, y_pred=y_pred) # f1, acc metrics = {'loss': loss_avg(), 'f1': f1, 'acc': acc} metrics_str = "; ".join("{}: {:05.2f}".format(k, v) for k, v in metrics.items()) logging.info("- {} metrics: ".format('Val') + metrics_str) return metrics
def interAct(model, data_iterator, params, mark='Interactive', verbose=False): """Evaluate the model on `steps` batches.""" # set model to evaluation mode model.eval() idx2tag = params.idx2tag true_tags = [] pred_tags = [] # a running average object for loss loss_avg = utils.RunningAverage() batch_data, batch_token_starts = next(data_iterator) batch_masks = batch_data.gt(0) batch_output = model((batch_data, batch_token_starts), token_type_ids=None, attention_mask=batch_masks)[ 0] # shape: (batch_size, max_len, num_labels) batch_output = batch_output.detach().cpu().numpy() pred_tags.extend([[idx2tag.get(idx) for idx in indices] for indices in np.argmax(batch_output, axis=2)]) return (get_entities(pred_tags))
def train(model, data_iterator, optimizer, scheduler, params): """Train the model on `steps` batches""" # set model to training mode model.train() scheduler.step() # a running average object for loss loss_avg = utils.RunningAverage() # Use tqdm for progress bar t = trange(params.train_steps, desc="Train: ") for _ in t: # fetch the next training batch sources, source_pos, targets, target_pos, negatives, negative_pos, negative_encoders = next(data_iterator) source_encodes, target_encodes, negative_encodes = model(sources, source_pos, targets, target_pos, negatives, negative_pos, negative_encoders) loss = cal_triplet_margin_loss(source_encodes, target_encodes, negative_encodes, params.margin) if params.n_gpu > 1 and params.multi_gpu: loss = loss.mean() # mean() to average on multi-gpu # clear previous gradients, compute gradients of all variables wrt loss model.zero_grad() loss.backward() # gradient clipping nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=params.clip_grad) # performs updates using calculated gradients optimizer.step() # update the average loss loss_avg.update(loss.item()) t.set_postfix(loss='{:05.3f}'.format(loss_avg())) return loss_avg()
def train(model, data_loader, optimizer, loss_fn, params, device): """Train the model for num of epochs Args: model: The 3D UNet (torch.nn.Module) dataloader: Object to fetch the training data (torch.utils.data.DataLoader) optimizer: Optimizer object for weight updates (torch.optim) loss_fn: Loss function num_epochs: number of epochs for training (int) device: The gpu device, if available ('cuda') """ model.train() loss_avg = utils.RunningAverage() for i, (input_batch, target_batch) in enumerate(data_loader): if params.cuda: input_batch, target_batch = input_batch.to(device),\ target_batch.to(device) output_batch = model(input_batch) loss = loss_fn(output_batch, input_batch) optimizer.zero_grad() loss_temp.backward() optimizer.step() # update the average loss loss_avg.update(loss.item())
def evaluate_model(model, loader, loss_fn, device, header, isDebug=False): model.eval() metric_watcher = utils.RunningAverage() for idx, (input_batch, target_batch) in enumerate(loader): input_batch, target_batch = input_batch.to(device), target_batch.to( device) # forward with torch.no_grad(): output_batch = model.forward(input_batch) loss_batch = loss_fn(output_batch, target_batch) _, prediction_batch = output_batch.max(1) correct_batch = prediction_batch.eq(target_batch).sum().item() metric_watcher.update(loss_batch.item() * input_batch.size(0), correct_batch, input_batch.size(0)) # if isDebug=True: # break loop if isDebug: break metric_watcher.calculate() avg_loss, accuracy, error, data_points = metric_watcher() logging.info( "{}: \tloss: {:.5f} \taccuracy: {:.1f}% \terror: {:.1f}% \tdata: {}". format(header, avg_loss, accuracy * 100, error * 100, data_points)) return model, avg_loss, error
def train(model, dataloader, optimizer, loss_fn, metric, params): model.train() loss_avg = utils.RunningAverage() output = [] y = [] with tqdm(total=len(dataloader)) as t: for X_batch, y_batch in dataloader: X_batch = X_batch.to(params.device) y_batch = y_batch.to(params.device) output_batch = model(X_batch) loss = loss_fn(output_batch, y_batch) optimizer.zero_grad() loss.backward() optimizer.step() loss_avg.update(loss.item()) y.append(y_batch.data.cpu().numpy()) output.append(output_batch.data.cpu().numpy()) t.set_postfix(loss='{:05.3f}'.format(loss_avg())) t.update() output = np.concatenate(output, axis=0) y = np.concatenate(y, axis=0) metric_score = metric(output, y) avg_loss = loss_avg() return avg_loss, metric_score
def make_ravgs(self, n_pulse): """ Instantiate running averages """ self.n_pulse = n_pulse self.ravg_score = utils.RunningAverage(self.n, self.ts_len, alpha=self.alpha) self.ravg_int = [] for ii in range(n_pulse): self.ravg_int.append( utils.RunningAverage(self.n, self.ts_len, alpha=self.alpha)) self.stripchartsView.make_stripchartsData((1, len(self.ravg_int))) self._ravg_ready = True print('Stripchart 1: fit score\nStripchart 2: {} pulses intensities'. format(self.n_pulse)) return
def train_epoch(model, data_iterator, optimizer, scheduler, params): """Train the model on `steps` batches""" # set model to training mode model.train() # a running average object for loss loss_avg = utils.RunningAverage() # Use tqdm for progress bar one_epoch = trange(params.train_steps) for batch in one_epoch: # fetch the next training batch batch_data, batch_token_starts, batch_tags = next(data_iterator) batch_masks = batch_data.gt(0) # get padding mask # compute model output and loss loss = model((batch_data, batch_token_starts), token_type_ids=None, attention_mask=batch_masks, labels=batch_tags)[0] # clear previous gradients, compute gradients of all variables wrt loss model.zero_grad() loss.backward() # gradient clipping nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=params.clip_grad) # performs updates using calculated gradients optimizer.step() scheduler.step() # update the average loss loss_avg.update(loss.item()) one_epoch.set_postfix(loss='{:05.3f}'.format(loss_avg()))
def evaluate_epoch(network, loader, loss_function, header, device): """ Method that evaluates network with valid or test loader :param network: model to evaluate :param loader: loader that returns valid or test images / labels :param loss_function: loss_function :param header: header to log :param device: cpu or cuda :return: model, average_loss, valid or test_error """ network.eval() metric_watcher = utils.RunningAverage() for idx, (input_batch, target_batch) in enumerate(loader): input_batch, target_batch = input_batch.to(device), target_batch.to( device) with torch.no_grad(): output_batch = network.forward(input_batch) loss_batch = loss_function(output_batch, target_batch) _, prediction_batch = output_batch.max(1) correct_batch = prediction_batch.eq(target_batch).sum().item() metric_watcher.update(loss_batch.item() * input_batch.size(0), correct_batch, input_batch.size(0)) metric_watcher.calculate() avg_loss, accuracy, error, data_points = metric_watcher() logging.info( "{}: \tloss: {:.5f} \taccuracy: {:.1f}% \terror: {:.1f}% \tdata: {}". format(header, avg_loss, accuracy * 100, error * 100, data_points)) return network, avg_loss, error
def train(model, optimizer, scheduler, loss_fn, dataloader, epoch): model.train() loss_avg_arr = [] loss_avg = utils.RunningAverage() with tqdm(total=len(dataloader)) as t: for data in dataloader: optimizer.zero_grad() data = data.to('cuda') x_cont = data.x[:,:8] x_cat = data.x[:,8:].long() phi = torch.atan2(data.x[:,1], data.x[:,0]) etaphi = torch.cat([data.x[:,3][:,None], phi[:,None]], dim=1) # NB: there is a problem right now for comparing hits at the +/- pi boundary edge_index = radius_graph(etaphi, r=deltaR, batch=data.batch, loop=True, max_num_neighbors=255) result = model(x_cont, x_cat, edge_index, data.batch) loss = loss_fn(result, data.x, data.y, data.batch) loss.backward() optimizer.step() # update the average loss loss_avg_arr.append(loss.item()) loss_avg.update(loss.item()) t.set_postfix(loss='{:05.3f}'.format(loss_avg())) t.update() scheduler.step(np.mean(loss_avg_arr)) print('Training epoch: {:02d}, MSE: {:.4f}'.format(epoch, np.mean(loss_avg_arr)))
def train(model, optimizer, loss_fn, dataloader, metrics, params): """Train the model on `num_steps` batches Args: model: (torch.nn.Module) the neural network optimizer: (torch.optim) optimizer for parameters of model loss_fn: dataloader: metrics: (dict) params: (Params) hyperparameters """ # set model to training mode model.train() # summary for current training loop and a running average object for loss summ = [] loss_avg = utils.RunningAverage() # Use tqdm for progress bar with tqdm(total=len(dataloader)) as t: for i, (train_batch, labels_batch) in enumerate(dataloader): # move to GPU if available if params.cuda: train_batch, labels_batch = train_batch.cuda(non_blocking=True), labels_batch.cuda(non_blocking=True) # convert to torch Variables train_batch, labels_batch = Variable(train_batch), Variable(labels_batch) # compute model output and loss output_batch = model(train_batch) loss = loss_fn(output_batch, labels_batch) # clear previous gradients, compute gradients of all variables wrt loss optimizer.zero_grad() loss.backward() # performs updates using calculated gradients optimizer.step() # Evaluate summaries only once in a while if i % params.save_summary_steps == 0: # extract data from torch Variable, move to cpu, convert to numpy arrays output_batch = output_batch.data.cpu().numpy() labels_batch = labels_batch.data.cpu().numpy() # compute all metrics on this batch summary_batch = {metric:metrics[metric](output_batch, labels_batch) for metric in metrics} summary_batch['loss'] = loss.data summ.append(summary_batch) # update the average loss loss_avg.update(loss.data) t.set_postfix(loss='{:05.3f}'.format(loss_avg())) t.update() # compute mean of all metrics in summary metrics_mean = {metric:np.mean([x[metric] for x in summ]) for metric in summ[0]} metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items()) logging.info("- Train metrics: " + metrics_string)
def evaluate(model, data_iterator, params, mark='Eval', verbose=False): """Evaluate the model on `steps` batches.""" # set model to evaluation mode model.eval() idx2tag = params.idx2tag true_tags = [] pred_tags = [] # a running average object for loss loss_avg = utils.RunningAverage() one_epoch = trange(params.eval_steps) for step, batch in zip(one_epoch, data_iterator): # fetch the next evaluation batch input_ids, label_ids, attention_mask, sentence_ids, label_mask = batch with torch.no_grad(): loss, logits, labels = model(input_ids, token_type_ids=sentence_ids, attention_mask=attention_mask, labels=label_ids, label_masks=label_mask) if params.n_gpu > 1 and params.multi_gpu: loss = loss.mean() loss_avg.update(loss.item()) batch_output = torch.argmax(F.log_softmax(logits, dim=2), dim=2) batch_output = batch_output.detach().cpu().numpy() batch_tags = labels.to('cpu').numpy() batch_true_tags = [[ idx2tag.get(idx) for idx in indices[np.where(indices != -1)] ] for indices in batch_tags] batch_pred_tags = [[ idx2tag.get(idx) for idx in indices[np.where(batch_tags[i] != -1)] ] for i, indices in enumerate(batch_output)] true_tags.extend(batch_true_tags) pred_tags.extend(batch_pred_tags) one_epoch.set_postfix(eval_loss='{:05.3f}'.format(loss_avg())) assert len(pred_tags) == len(true_tags) # logging loss, f1 and report metrics = {} f1 = f1_score(true_tags, pred_tags) metrics['loss'] = loss_avg() metrics['f1'] = f1 metrics_str = "; ".join("{}: {:05.4f}".format(k, v) for k, v in metrics.items()) logging.info("- {} metrics: ".format(mark) + metrics_str) if verbose: report = classification_report(true_tags, pred_tags) logging.info(report) return metrics
def train(model, optimizer, loss_fn, data_iterator, metrics, params, num_steps): """Train the model on `num_steps` batches Args: model: (torch.nn.Module) the neural network optimizer: (torch.optim) optimizer for parameters of model loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch data_iterator: (generator) a generator that generates batches of data and labels metrics: (dict) a dictionary of functions that compute a metric using the output and labels of each batch params: (Params) hyperparameters num_steps: (int) number of batches to train on, each of size params.batch_size """ # set model to training mode model.train() # summary for current training loop and a running average object for loss summ = [] loss_avg = utils.RunningAverage() # Use tqdm for progress bar t = trange(num_steps) for i in t: # fetch the next training batch train_batch, labels_batch = next(data_iterator) # compute model output and loss output_batch = model(train_batch) loss = loss_fn(output_batch, labels_batch) # clear previous gradients, compute gradients of all variables wrt loss optimizer.zero_grad() loss.backward() # performs updates using calculated gradients optimizer.step() # Evaluate summaries only once in a while if i % params.save_summary_steps == 0: # extract data from torch Variable, move to cpu, convert to numpy arrays output_batch = output_batch.data.cpu().numpy() labels_batch = labels_batch.data.cpu().numpy() # compute all metrics on this batch summary_batch = {metric: metrics[metric](output_batch, labels_batch) for metric in metrics} summary_batch['loss'] = loss.item() summ.append(summary_batch) # update the average loss loss_avg.update(loss.item()) t.set_postfix(loss='{:05.3f}'.format(loss_avg())) # compute mean of all metrics in summary metrics_mean = {metric: np.mean([x[metric] for x in summ]) for metric in summ[0]} metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items()) logging.info("- Train metrics: " + metrics_string)
def evaluate(model, loss_fn, dataloader, metrics, params): """Evaluate the model on `num_steps` batches. Args: model: (torch.nn.Module) the neural network loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch dataloader: (DataLoader) a torch.utils.data.DataLoader object that fetches data metrics: (dict) a dictionary of functions that compute a metric using the output and labels of each batch params: (Params) hyperparameters num_steps: (int) number of batches to train on, each of size params.batch_size """ # set model to evaluation mode model.eval() # summary for current eval loop preds = [] loss_avg = utils.RunningAverage() # compute metrics over the dataset for data_batch, labels_batch in dataloader: # move to GPU if available if params.cuda: device = torch.device('cuda') else: device = torch.device('cpu') data_batch = data_batch.to(device) labels_batch = labels_batch.to(device) # compute model output and loss with torch.no_grad(): output_batch = model(data_batch) loss = loss_fn(output_batch, labels_batch) preds.append(output_batch.sigmoid().detach().to(torch.device('cpu')).numpy()) # extract data from tensors, move to cpu, convert to numpy arrays #output_batch = output_batch.detach().to(torch.device('cpu')).numpy() #labels_batch = labels_batch.detach().to(torch.device('cpu')).numpy() # compute all metrics on this batch #summary_batch = {} #Modify this (Temporary definition to check training) #summary_batch = {metric: metrics[metric](output_batch, labels_batch) for metric in metrics} #summary_batch['loss'] = loss.item() #summ.append(summary_batch) #loss_val.append(loss.item()) # update the average loss loss_avg.update(loss.item()) # compute mean of all metrics in summary #metrics_mean = {metric: np.mean([x[metric] for x in summ]) for metric in summ[0]} #metrics_string = " ; ".join("{}_{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items()) logging.info("- Eval metrics: loss: {:05.3f}".format(loss_avg())) predictions = np.concatenate(preds) return loss_avg(), predictions
def train(model, optimizer, loss_fn, dataloader, metrics, params, epoch): """Train the model on `num_steps` batches Args: model: (torch.nn.Module) the neural network optimizer: (torch.optim) optimizer for parameters of model loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch dataloader: (DataLoader) a torch.utils.data.DataLoader object that fetches training data metrics: (dict) a dictionary of functions that compute a metric using the output and labels of each batch params: (Params) hyperparameters num_steps: (int) number of batches to train on, each of size params.batch_size """ summ = [] loss_avg = utils.RunningAverage() model.train() with tqdm(total=len(dataloader)) as t: for i, (train_batch, labels_batch) in enumerate(dataloader): train_batch, labels_batch = train_batch.cuda(), labels_batch.cuda() output_batch = model(train_batch) loss = loss_fn(output_batch, labels_batch) # clear previous gradients, compute gradients of all variables wrt loss optimizer.zero_grad() loss.backward() # performs updates using calculated gradients optimizer.step() # Evaluate summaries only once in a while if (i + 1) % params.save_summary_steps == 0: # extract data from torch Variable, move to cpu, convert to numpy arrays output_batch = output_batch.data.cpu().numpy() labels_batch = labels_batch.data.cpu().numpy() # compute all metrics on this batch summary_batch = { metric: metrics[metric](np.array(output_batch > 0, dtype=np.float32), labels_batch) for metric in metrics } summary_batch['loss'] = loss.item() summ.append(summary_batch) # update the average loss loss_avg.update(loss.item()) t.set_postfix(loss='{:05.3f}'.format(loss_avg())) t.update() # compute mean of all metrics in summary metrics_mean = { metric: np.mean([x[metric] for x in summ]) for metric in summ[0] } metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items()) logging.info("- Train metrics: %s" % metrics_string)
def allied_train(model, optimizer, loss_fn, data_iterator, metrics, params, num_steps, train_target=False): # set model to training mode model.train() # summary for current training loop and a running average object for loss summ = [] loss_avg = utils.RunningAverage() # Use tqdm for progress bar t = trange(num_steps) running_auc = utils.OutputAUC() running_auc_icds = utils.MetricsICD() for i in t: # fetch the next training batch train_batch_w2v, train_batch_sp, labels_batch, icd_labels, ids = next(data_iterator) output_batch, icd_batch = model(train_batch_w2v) if train_target: loss = loss_fn(output_batch, labels_batch) + loss_fn(icd_batch, icd_labels) else: loss = loss_fn(icd_batch, icd_labels) loss = loss / params.grad_acc # Normalize our loss (if averaged) running_auc.update(labels_batch.data.cpu().numpy(), output_batch.data.cpu().numpy()) running_auc_icds.update(icd_labels.data.cpu().numpy(), icd_batch.data.cpu().numpy()) # compute gradients of all variables wrt loss loss.backward() if i % params.grad_acc == 0: # performs updates using calculated gradients optimizer.step() # clear previous gradients optimizer.zero_grad() # Evaluate summaries only once in a while if i % params.save_summary_steps == 0: # extract data from torch Variable, move to cpu, convert to numpy arrays output_batch = output_batch.data.cpu().numpy() labels_batch = labels_batch.data.cpu().numpy() # compute all metrics on this batch summary_batch = {metric: metrics[metric](output_batch, labels_batch) for metric in metrics} summary_batch['loss'] = float(loss.data.item()) summ.append(summary_batch) # update the average loss loss_avg.update(float(loss.data.item())) t.set_postfix(loss='{:05.3f}'.format(loss_avg())) # compute mean of all metrics in summary metrics_mean = {metric: np.mean([x[metric] for x in summ]) for metric in summ[0]} metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items()) logging.info("- Train metrics: " + metrics_string) logging.info('Train AUC: ' + str(running_auc())) logging.info('Train ICD AUC: ' + str(running_auc_icds())) return loss_avg()
def evaluate(test_loader, model, model_T, criterion, criterion_T, accuracy, args): # set model to evaluation mode model.eval() # set teacher model to evaluation mode model_T.eval() loss_avg = utils.RunningAverage() # loss_teacher_avg = utils.RunningAverage() # loss_true_avg = utils.RunningAverage() accTop1_avg = utils.RunningAverage() end = time.time() with torch.no_grad(): for _, (test_batch, labels_batch) in enumerate(test_loader): test_batch, labels_batch = test_batch.to(device), labels_batch.to( device) # compute model output and loss output_batch = model(test_batch) # loss_true = criterion(output_batch, labels_batch) # loss_teacher = criterion_T(output_batch, teacher_outputs) # loss = loss_true + args.alpha * args.temperature * args.temperature * loss_teacher loss = criterion(output_batch, labels_batch) # Update average loss and accuracy metrics = accuracy(output_batch, labels_batch) accTop1_avg.update(metrics[0].item()) # loss_true_avg.update(loss_true.item()) # loss_teacher_avg.update(loss_teacher.item()) loss_avg.update(loss.item()) # compute mean of all metrics in summary # 'test_true_loss': loss_true_avg.value(), # 'test_teacher_loss': loss_teacher_avg.value(), test_metrics = { 'test_loss': loss_avg.value(), 'test_accTop1': accTop1_avg.value(), 'time': time.time() - end } metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in test_metrics.items()) logging.info("- Test metrics: " + metrics_string) return test_metrics
def evaluate(model, data_iterator, params, mark='Eval', verbose=True): """Evaluate the model on `steps` batches.""" # set model to evaluation mode model.eval() # id2tag dict idx2tag = {idx: tag for idx, tag in enumerate(params.tags)} true_tags = [] pred_tags = [] # a running average object for loss loss_avg = utils.RunningAverage() for input_ids, input_mask, labels in data_iterator: # to device input_ids = input_ids.to(params.device) input_mask = input_mask.to(params.device) labels = labels.to(params.device) batch_size, max_len = labels.size() # get loss loss = model(input_ids, attention_mask=input_mask.bool(), labels=labels) loss /= batch_size # update the average loss loss_avg.update(loss.item()) # inference with torch.no_grad(): batch_output = model(input_ids, attention_mask=input_mask.bool()) # 恢复标签真实长度 real_batch_tags = [] for i in range(batch_size): real_len = int(input_mask[i].sum()) real_batch_tags.append(labels[i][:real_len].to('cpu').numpy()) # List[int] pred_tags.extend([idx2tag.get(idx) for indices in batch_output for idx in indices]) true_tags.extend([idx2tag.get(idx) for indices in real_batch_tags for idx in indices]) # sanity check assert len(pred_tags) == len(true_tags), 'len(pred_tags) is not equal to len(true_tags)!' # logging loss, f1 and report metrics = {} f1 = f1_score(true_tags, pred_tags) accuracy = accuracy_score(true_tags, pred_tags) metrics['loss'] = loss_avg() metrics['f1'] = f1 metrics['accuracy'] = accuracy metrics_str = "; ".join("{}: {:05.2f}".format(k, v) for k, v in metrics.items()) logging.info("- {} metrics: ".format(mark) + metrics_str) # f1 classification report if verbose: report = classification_report(true_tags, pred_tags) logging.info(report) return metrics
def train(model, optimizer, loss_fn, dataloader, params): """Train the model on `num_steps` batches Args: model: (torch.nn.Module) the neural network optimizer: (torch.optim) optimizer for parameters of model loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch dataloader: (DataLoader) a torch.utils.data.DataLoader object that fetches training data params: (Params) hyperparameters num_steps: (int) number of batches to train on, each of size params.batch_size """ # set model to training mode model.train() # summary for current training loop and a running average object for loss summ = [] loss_avg = utils.RunningAverage() # with tqdm(total=len(dataloader), ncols=80, disable=True) as t: with tqdm(disable=False) as t: for ii, (train_batch, _) in enumerate(dataloader): # move to GPU if available if params.cuda: train_batch = train_batch.cuda(non_blocking=True) # convert to torch Variables train_batch = Variable(train_batch) # compute model output and loss recon_batch, mu, logvar = model(train_batch) loss = loss_fn(recon_batch, train_batch, mu, logvar) # clear previous gradients, compute gradients of all variables wrt loss optimizer.zero_grad() loss.backward() # performs updates using calculated gradients optimizer.step() # Evaluate summaries only once in a while if ii % params.save_summary_steps == 0: # compute all metrics on this batch summary_batch = {} summary_batch['loss'] = loss.item() summ.append(summary_batch) # update the average loss loss_avg.update(loss.item()) t.set_postfix(loss='{:05.3f}'.format(loss_avg())) t.update() # compute mean of all metrics in summary metrics_mean = {metric: np.sum([x[metric] for x in summ]) / len(dataloader.dataset) \ for metric in summ[0]} metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items()) logging.info("- Train metrics: " + metrics_string)
def evaluate(model, data_iterator, params, mark='Test', verbose=False): """Evaluate the model on `steps` batches.""" # set model to evaluation mode model.eval() idx2tag = params.idx2tag true_tags = [] pred_tags = [] # a running average object for loss loss_avg = utils.RunningAverage() for _ in range(params.eval_steps): # fetch the next evaluation batch batch_data, batch_tags = next(data_iterator) batch_masks = batch_data.gt(0) loss = model(batch_data, token_type_ids=None, attention_mask=batch_masks, labels=batch_tags) batch_output = model(batch_data, token_type_ids=None, attention_mask=batch_masks ) # shape: (batch_size, max_len, num_labels) loss = loss[0] batch_output = batch_output[0] if params.n_gpu > 1 and params.multi_gpu: loss = loss.mean() loss_avg.update(loss.item()) batch_output = batch_output.detach().cpu().numpy() batch_tags = batch_tags.to('cpu').numpy() pred_tags.extend([ idx2tag.get(idx) for indices in np.argmax(batch_output, axis=2) for idx in indices ]) true_tags.extend( [idx2tag.get(idx) for indices in batch_tags for idx in indices]) assert len(pred_tags) == len(true_tags) # logging loss, f1 and report metrics = {} f1 = f1_score(true_tags, pred_tags) metrics['loss'] = loss_avg() metrics['f1'] = f1 metrics_str = "; ".join("{}: {:05.2f}".format(k, v) for k, v in metrics.items()) logging.info("- {} metrics: ".format(mark) + metrics_str) if verbose: report = classification_report(true_tags, pred_tags) logging.info(report) return metrics