def get_criterion(config): if config.loss == 'focal': criterion = L.FocalLoss() return criterion elif config.loss == 'ce': return torch.nn.CrossEntropyLoss() return None
def define_losses( num_classes: int, use_focal: bool = False, loss_weights: Optional[List[float]] = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] ) -> Tuple[torch.nn.Module]: assert num_classes in {7, 8} if num_classes == 7: loss_f = torch.nn.BCEWithLogitsLoss( pos_weight=torch.tensor(loss_weights).to( torch_config.device).reshape(1, num_classes, 1, 1)) loss_d = DiceLoss(torch_config.device) else: if use_focal: loss_f = losses.FocalLoss() else: loss_f = torch.nn.CrossEntropyLoss() loss_d = DiceLoss(torch_config.device) return loss_f, loss_d
test_preds_iter = np.zeros((2000000, 11)) it = 0 for it in range(1): device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") model = Seq2SeqRnn(input_size=trainval.shape[1], seq_len=4000, hidden_size=64, output_size=11, num_layers=2, hidden_layers=[64, 64, 64], bidirectional=True).to(device) number_of_epochs = 150 early_stopping = EarlyStopping(patience=20, is_maximize=True, checkpoint_path="./models/gru_clean_checkpoint_fold_{}_iter_{}.pt".format(index, it)) criterion = L.FocalLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3, max_lr=1e-3, epochs=number_of_epochs, steps_per_epoch=len(train_dataloader)) avg_train_losses, avg_valid_losses = [], [] for epoch in range(number_of_epochs): start_time = time.time() print("Epoch : {}".format(epoch)) print( "learning_rate: {:0.9f}".format(scheduler.get_lr()[0]))
@torch.no_grad() def test_soft_bce_loss(): criterion = L.SoftBCEWithLogitsLoss(smooth_factor=0.1, ignore_index=-100) # Ideal case y_pred = torch.tensor([-9, 9, 1, 9, -9]).float() y_true = torch.tensor([0, 1, -100, 1, 0]).long() loss = criterion(y_pred, y_true) print(loss) @pytest.mark.parametrize( "criterion", [ L.BiTemperedLogisticLoss(t1=1, t2=0.8), L.FocalCosineLoss(), L.FocalLoss(), L.SoftF1Loss(), L.SoftCrossEntropyLoss(), ], ) def test_classification_losses(criterion): # Ideal case y_pred = torch.tensor([[+9, -9, -9, -9], [-9, +9, -9, -9], [-9, -9, +9, -9], [-9, -9, -9, +9]]).float() y_true = torch.tensor([0, 1, 2, 3]).long() loss = criterion(y_pred, y_true) print(loss)
def validate(valid_loader, model, device): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() scores = AverageMeter() sent_count = AverageMeter() seq_len = model.cfg.seq_len model_type = model.cfg.model_type # switch to evaluation mode model.eval() start = end = time.time() predictions = [] groundtruth = [] for step, (cate_x, cont_x, mask, y, label) in enumerate(valid_loader): # measure data loading time data_time.update(time.time() - end) cate_x = cate_x.to(device) cont_x, mask, y = cont_x.to(device), mask.to(device), y.to(device) label = label.to(device) batch_size = cont_x.size(0) # compute loss with torch.no_grad(): pred = model(cate_x, cont_x, mask) # record loss if model_type == 'seq2seq': pred_ = pred.view(-1, pred.shape[-1]) y_ = y.view(-1) loss = L.FocalLoss(ignore_index=-1)(pred_, y_) predictions.append(pred[:, seq_len // 2, :].detach().cpu().numpy().argmax(1)) groundtruth.append(label.detach().cpu().numpy()) else: loss = L.FocalLoss(ignore_index=-1)(pred, label) predictions.append(pred.detach().cpu().numpy().argmax(1)) groundtruth.append(label.detach().cpu().numpy()) losses.update(loss.item(), batch_size) if CFG.gradient_accumulation_steps > 1: loss = loss / CFG.gradient_accumulation_steps # measure elapsed time batch_time.update(time.time() - end) end = time.time() sent_count.update(batch_size) if step % CFG.print_freq == 0 or step == (len(valid_loader)-1): # record accuracy score = f1_score(np.concatenate(predictions), np.concatenate(groundtruth), labels=list(range(11)), average='macro') scores.update(score, batch_size) print('TEST: {0}/{1}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Elapsed {remain:s} ' 'Loss: {loss.val:.4f}({loss.avg:.4f}) ' 'Score: {score.val:.4f}({score.avg:.4f}) ' 'sent/s {sent_s:.0f} ' .format( step, len(valid_loader), batch_time=batch_time, data_time=data_time, loss=losses, score=scores, remain=timeSince(start, float(step+1)/len(valid_loader)), sent_s=sent_count.avg/batch_time.avg )) predictions = np.concatenate(predictions) groundtruth = np.concatenate(groundtruth) # scoring score = f1_score(predictions, groundtruth, labels=list(range(11)), average='macro') return losses.avg, score, predictions, groundtruth
def train(train_loader, model, optimizer, epoch, scheduler, device): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() scores = AverageMeter() sent_count = AverageMeter() seq_len = model.cfg.seq_len model_type = model.cfg.model_type # switch to train mode model.train() train_preds, train_true = [], [] start = end = time.time() global_step = 0 for step, (cate_x, cont_x, mask, y, label) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) cate_x = cate_x.to(device) cont_x, mask, y = cont_x.to(device), mask.to(device), y.to(device) label = label.to(device) batch_size = cont_x.size(0) # compute loss pred = model(cate_x, cont_x, mask) if model_type == 'seq2seq': pred_ = pred.view(-1, pred.shape[-1]) y_ = y.view(-1) loss = L.FocalLoss(ignore_index=-1)(pred_, y_) train_true.append(label.detach().cpu().numpy()) train_preds.append(pred[:, seq_len // 2, :].detach().cpu().numpy().argmax(1)) else: loss = L.FocalLoss(ignore_index=-1)(pred, label) train_true.append(label.detach().cpu().numpy()) train_preds.append(pred.detach().cpu().numpy().argmax(1)) # record loss losses.update(loss.item(), batch_size) if CFG.gradient_accumulation_steps > 1: loss = loss / CFG.gradient_accumulation_steps loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm) if (step + 1) % CFG.gradient_accumulation_steps == 0: scheduler.step() optimizer.step() optimizer.zero_grad() global_step += 1 # measure elapsed time batch_time.update(time.time() - end) end = time.time() sent_count.update(batch_size) if step % CFG.print_freq == 0 or step == (len(train_loader)-1): # record accuracy score = f1_score(np.concatenate(train_preds), np.concatenate(train_true), labels=list(range(11)), average='macro') scores.update(score, batch_size) print('Epoch: [{0}][{1}/{2}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Elapsed {remain:s} ' 'Loss: {loss.val:.4f}({loss.avg:.4f}) ' 'Score: {score.val:.4f}({score.avg:.4f}) ' 'Grad: {grad_norm:.4f} ' 'LR: {lr:.6f} ' 'sent/s {sent_s:.0f} ' .format( epoch, step, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, score=scores, remain=timeSince(start, float(step+1)/len(train_loader)), grad_norm=grad_norm, lr=scheduler.get_lr()[0], #lr=scheduler.optimizer.param_groups[0]['lr'], sent_s=sent_count.avg/batch_time.avg )) return losses.avg, scores.avg
def validate(valid_loader, model, device): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() scores = AverageMeter() sent_count = AverageMeter() # switch to evaluation mode model.eval() val_preds, val_true = torch.Tensor([]).to(device), torch.LongTensor( []).to(device) start = end = time.time() for step, (cont_x, mask, y) in enumerate(valid_loader): # measure data loading time data_time.update(time.time() - end) cont_x, mask, y = cont_x.to(device), mask.to(device), y.to(device) batch_size = cont_x.size(0) # compute loss with torch.no_grad(): #pred = model(cont_x, mask) pred = model(cont_x) # record loss pred_ = pred.view(-1, pred.shape[-1]) y_ = y.view(-1) loss = L.FocalLoss(ignore_index=-1)(pred_, y_) losses.update(loss.item(), batch_size) # record accuracy val_true = torch.cat([val_true, y_.long()], 0) val_preds = torch.cat([val_preds, pred_], 0) if CFG.gradient_accumulation_steps > 1: loss = loss / CFG.gradient_accumulation_steps # measure elapsed time batch_time.update(time.time() - end) end = time.time() sent_count.update(batch_size) if step % CFG.print_freq == 0 or step == (len(valid_loader) - 1): print('TEST: {0}/{1}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Elapsed {remain:s} ' 'Loss: {loss.val:.4f}({loss.avg:.4f}) ' #'Score: {score.val:.4f}({score.avg:.4f}) ' 'sent/s {sent_s:.0f} '.format( step, len(valid_loader), batch_time=batch_time, data_time=data_time, loss=losses, #score=scores, remain=timeSince(start, float(step + 1) / len(valid_loader)), sent_s=sent_count.avg / batch_time.avg)) # scoring predictions = val_preds.cpu().detach().numpy().argmax(1) groundtruth = val_true.cpu().detach().numpy() score = f1_score(predictions, groundtruth, labels=list(range(11)), average='macro') return losses.avg, score, predictions, groundtruth
def train(train_loader, model, optimizer, epoch, scheduler, device): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() scores = AverageMeter() sent_count = AverageMeter() # switch to train mode model.train() train_preds, train_true = torch.Tensor([]).to(device), torch.LongTensor( []).to(device) start = end = time.time() global_step = 0 for step, (cont_x, mask, y) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) cont_x, mask, y = cont_x.to(device), mask.to(device), y.to(device) batch_size = cont_x.size(0) # compute loss #pred = model(cont_x, mask) pred = model(cont_x) pred_ = pred.view(-1, pred.shape[-1]) y_ = y.view(-1) loss = L.FocalLoss(ignore_index=-1)(pred_, y_) # record loss losses.update(loss.item(), batch_size) """ train_true = torch.cat([train_true, y_.long()], 0) train_preds = torch.cat([train_preds, pred_], 0) """ if CFG.gradient_accumulation_steps > 1: loss = loss / CFG.gradient_accumulation_steps if USE_APEX: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm) if (step + 1) % CFG.gradient_accumulation_steps == 0: scheduler.step() optimizer.step() optimizer.zero_grad() global_step += 1 # measure elapsed time batch_time.update(time.time() - end) end = time.time() sent_count.update(batch_size) if step % CFG.print_freq == 0 or step == (len(train_loader) - 1): print('Epoch: [{0}][{1}/{2}]'.format(epoch, step, len(train_loader))) """ if step % CFG.print_freq == 0 or step == (len(train_loader)-1): # record accuracy score = f1_score(train_true.cpu().detach().numpy(), train_preds.cpu().detach().numpy().argmax(1), labels=list(range(11)), average='macro') scores.update(score, batch_size) print('Epoch: [{0}][{1}/{2}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Elapsed {remain:s} ' 'Loss: {loss.val:.4f}({loss.avg:.4f}) ' 'Score: {score.val:.4f}({score.avg:.4f}) ' 'Grad: {grad_norm:.4f} ' 'LR: {lr:.6f} ' 'sent/s {sent_s:.0f} ' .format( epoch, step, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, score=scores, remain=timeSince(start, float(step+1)/len(train_loader)), grad_norm=grad_norm, lr=scheduler.get_lr()[0], #lr=scheduler.optimizer.param_groups[0]['lr'], sent_s=sent_count.avg/batch_time.avg )) """ #return losses.avg, scores.avg return losses.avg, 0