def train_one_epoch(model, train_loader, criterion, optimizer, device, accumulation_steps, total_step, n_labels, base_lr, steps_upd_logging=500, gamma=None): model.train() optimizer.zero_grad() total_loss = 0.0 train_losses = [] for step, (features, targets) in enumerate(train_loader): features = trim_tensors(features) features, targets = features.to(device), targets.to(device) logits = model(features, attention_mask=features > 0, labels=None) if n_labels == 1: loss = criterion(logits.view(-1, 1), targets.view(-1, 1)) else: loss = criterion(logits, targets) with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() if (step + 1) % accumulation_steps == 0: # Wait for several backward steps optimizer.step() # Now we can do an optimizer step optimizer.zero_grad() if gamma is not None and step == int(total_step / 2): for param_group in optimizer.param_groups: param_group['lr'] = base_lr * gamma total_loss += loss.item() if (step + 1) % steps_upd_logging == 0: train_losses.append(total_loss / (step + 1)) LOGGER.info(f'Train loss on step {step + 1} was {round(total_loss / (step + 1), 5)}') return total_loss / (step + 1), train_losses
def validate(model, valid_loader, criterion, device, n_labels): for param in model.parameters(): param.requires_grad = False model.eval() test_loss = 0.0 oof_pred = [] with torch.no_grad(): for step, (features, targets) in enumerate(valid_loader): features = trim_tensors(features) features, targets = features.to(device), targets.to(device) logits = model(features, attention_mask=features > 0, labels=None) if n_labels == 1: loss = criterion(logits.view(-1, 1), targets.view(-1, 1)) else: loss = criterion(logits, targets) test_loss += loss.item() oof_pred.append(torch.sigmoid(logits)) oof_pred = torch.cat(oof_pred).float().cpu().numpy() for param in model.parameters(): param.requires_grad = True LOGGER.info(f'Mean val loss: {round(test_loss / (step + 1), 5)}') return test_loss / (step + 1), oof_pred
def inference(models, test_loader, device, n_labels): test_pred = [] with torch.no_grad(): for features in test_loader: features = trim_tensors(features[0]) features = features.to(device) for i, model in enumerate(models): if i == 0: logits = torch.sigmoid( model(features, attention_mask=features > 0, labels=None)) else: logits += torch.sigmoid( model(features, attention_mask=features > 0, labels=None)) test_pred.append(logits / len(models)) test_pred = torch.cat(test_pred).float().cpu().numpy() return test_pred