Beispiel #1
0
def get_criterion(config):
    if config.loss == 'focal':
        criterion = L.FocalLoss()
        return criterion
    elif config.loss == 'ce':
        return torch.nn.CrossEntropyLoss()
    return None
Beispiel #2
0
def define_losses(
    num_classes: int,
    use_focal: bool = False,
    loss_weights: Optional[List[float]] = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
) -> Tuple[torch.nn.Module]:

    assert num_classes in {7, 8}
    if num_classes == 7:
        loss_f = torch.nn.BCEWithLogitsLoss(
            pos_weight=torch.tensor(loss_weights).to(
                torch_config.device).reshape(1, num_classes, 1, 1))
        loss_d = DiceLoss(torch_config.device)
    else:
        if use_focal:
            loss_f = losses.FocalLoss()
        else:
            loss_f = torch.nn.CrossEntropyLoss()

        loss_d = DiceLoss(torch_config.device)

    return loss_f, loss_d
 test_preds_iter = np.zeros((2000000, 11))
 it = 0
 
 for it in range(1):
     device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
     model = Seq2SeqRnn(input_size=trainval.shape[1], 
                        seq_len=4000, 
                        hidden_size=64, 
                        output_size=11, 
                        num_layers=2, 
                        hidden_layers=[64, 64, 64],
                        bidirectional=True).to(device)
 
     number_of_epochs = 150
     early_stopping = EarlyStopping(patience=20, is_maximize=True, checkpoint_path="./models/gru_clean_checkpoint_fold_{}_iter_{}.pt".format(index, it))
     criterion = L.FocalLoss()
     optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
     scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer=optimizer, 
                                                     pct_start=0.1, 
                                                     div_factor=1e3, 
                                                     max_lr=1e-3, 
                                                     epochs=number_of_epochs,
                                                     steps_per_epoch=len(train_dataloader))
     avg_train_losses, avg_valid_losses = [], [] 
 
     for epoch in range(number_of_epochs):
         start_time = time.time()
 
         print("Epoch : {}".format(epoch))
         print( "learning_rate: {:0.9f}".format(scheduler.get_lr()[0]))
         
@torch.no_grad()
def test_soft_bce_loss():
    criterion = L.SoftBCEWithLogitsLoss(smooth_factor=0.1, ignore_index=-100)

    # Ideal case
    y_pred = torch.tensor([-9, 9, 1, 9, -9]).float()
    y_true = torch.tensor([0, 1, -100, 1, 0]).long()

    loss = criterion(y_pred, y_true)
    print(loss)


@pytest.mark.parametrize(
    "criterion",
    [
        L.BiTemperedLogisticLoss(t1=1, t2=0.8),
        L.FocalCosineLoss(),
        L.FocalLoss(),
        L.SoftF1Loss(),
        L.SoftCrossEntropyLoss(),
    ],
)
def test_classification_losses(criterion):
    # Ideal case
    y_pred = torch.tensor([[+9, -9, -9, -9], [-9, +9, -9, -9], [-9, -9, +9, -9], [-9, -9, -9, +9]]).float()
    y_true = torch.tensor([0, 1, 2, 3]).long()

    loss = criterion(y_pred, y_true)
    print(loss)
Beispiel #5
0
def validate(valid_loader, model, device):
    
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    sent_count = AverageMeter()
    seq_len = model.cfg.seq_len
    model_type = model.cfg.model_type
    
    # switch to evaluation mode
    model.eval()

    start = end = time.time()
    
    predictions = []
    groundtruth = []
    for step, (cate_x, cont_x, mask, y, label) in enumerate(valid_loader):
        # measure data loading time
        data_time.update(time.time() - end)        
        
        cate_x = cate_x.to(device)
        cont_x, mask, y = cont_x.to(device), mask.to(device), y.to(device)
        label = label.to(device)
        batch_size = cont_x.size(0)
        
        # compute loss
        with torch.no_grad():        
            pred = model(cate_x, cont_x, mask)        

            # record loss
            if model_type == 'seq2seq':
                pred_ = pred.view(-1, pred.shape[-1])
                y_ = y.view(-1)
                loss = L.FocalLoss(ignore_index=-1)(pred_, y_)
                predictions.append(pred[:, seq_len // 2, :].detach().cpu().numpy().argmax(1))
                groundtruth.append(label.detach().cpu().numpy())
            else:
                loss = L.FocalLoss(ignore_index=-1)(pred, label)
                predictions.append(pred.detach().cpu().numpy().argmax(1))
                groundtruth.append(label.detach().cpu().numpy())

            losses.update(loss.item(), batch_size)
        
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps    

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        sent_count.update(batch_size)

        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            # record accuracy
            score = f1_score(np.concatenate(predictions), 
                             np.concatenate(groundtruth), labels=list(range(11)), average='macro')
            scores.update(score, batch_size)

            print('TEST: {0}/{1}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Score: {score.val:.4f}({score.avg:.4f}) '
                  'sent/s {sent_s:.0f} '
                  .format(
                   step, len(valid_loader), batch_time=batch_time,                   
                   data_time=data_time, loss=losses,
                   score=scores,
                   remain=timeSince(start, float(step+1)/len(valid_loader)),
                   sent_s=sent_count.avg/batch_time.avg
                   ))

    predictions = np.concatenate(predictions)
    groundtruth = np.concatenate(groundtruth)
        
    # scoring
    score = f1_score(predictions, groundtruth, labels=list(range(11)), average='macro') 
    
    return losses.avg, score, predictions, groundtruth
Beispiel #6
0
def train(train_loader, model, optimizer, epoch, scheduler, device):
    
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    sent_count = AverageMeter()
    seq_len = model.cfg.seq_len
    model_type = model.cfg.model_type

    # switch to train mode
    model.train()
    train_preds, train_true = [], []

    start = end = time.time()
    global_step = 0
    
    for step, (cate_x, cont_x, mask, y, label) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        
        cate_x = cate_x.to(device)
        cont_x, mask, y = cont_x.to(device), mask.to(device), y.to(device)
        label = label.to(device)
        batch_size = cont_x.size(0)        
        
        # compute loss
        pred = model(cate_x, cont_x, mask)

        if model_type == 'seq2seq':
            pred_ = pred.view(-1, pred.shape[-1])
            y_ = y.view(-1)
            loss = L.FocalLoss(ignore_index=-1)(pred_, y_)
            train_true.append(label.detach().cpu().numpy())
            train_preds.append(pred[:, seq_len // 2, :].detach().cpu().numpy().argmax(1))
        else:
            loss = L.FocalLoss(ignore_index=-1)(pred, label)
            train_true.append(label.detach().cpu().numpy())
            train_preds.append(pred.detach().cpu().numpy().argmax(1))

        # record loss
        losses.update(loss.item(), batch_size)
        
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        
        loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        
        if (step + 1) % CFG.gradient_accumulation_steps == 0:      
            scheduler.step()
            optimizer.step()
            optimizer.zero_grad()
            global_step += 1    

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        sent_count.update(batch_size)

        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            # record accuracy
            score = f1_score(np.concatenate(train_preds), 
                             np.concatenate(train_true), labels=list(range(11)), average='macro')
            scores.update(score, batch_size)
            
            print('Epoch: [{0}][{1}/{2}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Score: {score.val:.4f}({score.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.6f}  '
                  'sent/s {sent_s:.0f} '
                  .format(
                   epoch, step, len(train_loader), batch_time=batch_time,                   
                   data_time=data_time, loss=losses,
                   score=scores,
                   remain=timeSince(start, float(step+1)/len(train_loader)),
                   grad_norm=grad_norm,
                   lr=scheduler.get_lr()[0],
                   #lr=scheduler.optimizer.param_groups[0]['lr'],
                   sent_s=sent_count.avg/batch_time.avg
                   ))

    return losses.avg, scores.avg
Beispiel #7
0
def validate(valid_loader, model, device):

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    sent_count = AverageMeter()

    # switch to evaluation mode
    model.eval()
    val_preds, val_true = torch.Tensor([]).to(device), torch.LongTensor(
        []).to(device)

    start = end = time.time()

    for step, (cont_x, mask, y) in enumerate(valid_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        cont_x, mask, y = cont_x.to(device), mask.to(device), y.to(device)
        batch_size = cont_x.size(0)

        # compute loss
        with torch.no_grad():
            #pred = model(cont_x, mask)
            pred = model(cont_x)

            # record loss
            pred_ = pred.view(-1, pred.shape[-1])
            y_ = y.view(-1)
            loss = L.FocalLoss(ignore_index=-1)(pred_, y_)

            losses.update(loss.item(), batch_size)

        # record accuracy
        val_true = torch.cat([val_true, y_.long()], 0)
        val_preds = torch.cat([val_preds, pred_], 0)

        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        sent_count.update(batch_size)

        if step % CFG.print_freq == 0 or step == (len(valid_loader) - 1):
            print('TEST: {0}/{1}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  #'Score: {score.val:.4f}({score.avg:.4f}) '
                  'sent/s {sent_s:.0f} '.format(
                      step,
                      len(valid_loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      #score=scores,
                      remain=timeSince(start,
                                       float(step + 1) / len(valid_loader)),
                      sent_s=sent_count.avg / batch_time.avg))

    # scoring
    predictions = val_preds.cpu().detach().numpy().argmax(1)
    groundtruth = val_true.cpu().detach().numpy()
    score = f1_score(predictions,
                     groundtruth,
                     labels=list(range(11)),
                     average='macro')

    return losses.avg, score, predictions, groundtruth
Beispiel #8
0
def train(train_loader, model, optimizer, epoch, scheduler, device):

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    sent_count = AverageMeter()

    # switch to train mode
    model.train()
    train_preds, train_true = torch.Tensor([]).to(device), torch.LongTensor(
        []).to(device)

    start = end = time.time()
    global_step = 0

    for step, (cont_x, mask, y) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        cont_x, mask, y = cont_x.to(device), mask.to(device), y.to(device)
        batch_size = cont_x.size(0)

        # compute loss
        #pred = model(cont_x, mask)
        pred = model(cont_x)

        pred_ = pred.view(-1, pred.shape[-1])
        y_ = y.view(-1)
        loss = L.FocalLoss(ignore_index=-1)(pred_, y_)

        # record loss
        losses.update(loss.item(), batch_size)
        """
        train_true = torch.cat([train_true, y_.long()], 0)
        train_preds = torch.cat([train_preds, pred_], 0)
        """

        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        if USE_APEX:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()

        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   CFG.max_grad_norm)

        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            scheduler.step()
            optimizer.step()
            optimizer.zero_grad()
            global_step += 1

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        sent_count.update(batch_size)

        if step % CFG.print_freq == 0 or step == (len(train_loader) - 1):
            print('Epoch: [{0}][{1}/{2}]'.format(epoch, step,
                                                 len(train_loader)))
        """
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            # record accuracy
            score = f1_score(train_true.cpu().detach().numpy(), train_preds.cpu().detach().numpy().argmax(1), labels=list(range(11)), average='macro')
            scores.update(score, batch_size)

            print('Epoch: [{0}][{1}/{2}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Score: {score.val:.4f}({score.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.6f}  '
                  'sent/s {sent_s:.0f} '
                  .format(
                   epoch, step, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses,
                   score=scores,
                   remain=timeSince(start, float(step+1)/len(train_loader)),
                   grad_norm=grad_norm,
                   lr=scheduler.get_lr()[0],
                   #lr=scheduler.optimizer.param_groups[0]['lr'],
                   sent_s=sent_count.avg/batch_time.avg
                   ))
        """
    #return losses.avg, scores.avg
    return losses.avg, 0