def train(network, optimizer, dataloader, device): alpha = 0.6 network.train() loss_per_epoch, loss_bce_per_epoch, loss_iou_per_epoch = 0, 0, 0 process = tqdm(enumerate(dataloader)) for i, data in process: optimizer.zero_grad() img, gt = data img = img.to(device) gt = gt.to(device) pred = network(img) bceloss = criterion(pred, gt) iouloss = iou_loss(pred, gt) loss = alpha * bceloss + (1 - alpha) * iouloss loss.backward() optimizer.step() loss_per_epoch += loss.detach_().cpu().item() loss_bce_per_epoch += bceloss.detach_().cpu().item() loss_iou_per_epoch += iouloss.detach_().cpu().item() # process.set_description('BCELoss:{},IoULoss:{}'.format( # bceloss.detach_().cpu().item(),iouloss.detach_().cpu().item())) loss_dict = dict(loss=loss_per_epoch / len(dataloader), loss_bce=loss_bce_per_epoch / len(dataloader), loss_iou=loss_iou_per_epoch / len(dataloader)) return loss_dict
def eval_model(args, model, device, test_loader): """ Evaluation function for supervised learning. Metrics are balanced (macro) and unbalanced (micro) top-1 accuracies. :param args: :param model: :param device: :param test_loader: :return: """ model.eval() test_loss = AverageMeter() top1 = AverageMeter() # init vars for per class metrics correct = np.zeros((args['learn']['num_classes'],)) total = np.zeros((args['learn']['num_classes'],)) criterion = torch.nn.CrossEntropyLoss() with torch.no_grad(): for batch_idx, (examples, labels, index) in enumerate(test_loader): # index contains the clip ID, i.e. all TF patches loaded from the same clip share this ID (the label too) examples, labels, index = examples.to(device), labels.to(device), index.to(device) idx_batch = torch.unique(index) counter = 0 for i in range(len(idx_batch)): # Each iteration averages softmax predictions to get each clip prediction # process one evaluation clip num_examples = len(torch.nonzero(index==idx_batch[i])) output = model(examples[counter:counter+num_examples].unsqueeze(1)) # logits per patch loss = criterion(output, labels[counter:counter+num_examples]) output = F.softmax(output, dim=1) # class probabilities per patch output2 = torch.mean(output, dim=0) # class probabilities per clip pred = output2.argmax() # get the index of the max log-probability gt_label = labels[counter:counter + num_examples][0] # update count for correct (0 or 1), and total (1) correct[gt_label] += pred.eq(gt_label.view_as(pred)).sum().item() total[gt_label] += 1 # compute unbalanced (micro) accuracy prec1, prec5 = accuracy_v2(output2.unsqueeze(0), labels[counter:counter+num_examples][0], top=[1, 5]) test_loss.update(loss.item(), num_examples) top1.update(prec1.item(), num_examples) counter += num_examples # after all evaluation set, finalize per class accuracy, and compute balanced (macro) accuracy acc_per_class = 100*(correct/total) balanced_accuracy = acc_per_class.mean() return test_loss.avg, top1.avg, balanced_accuracy, acc_per_class
def train_CrossEntropy(args, model, device, train_loader, optimizer, epoch, num_classes): batch_time = AverageMeter() train_loss = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() criterion = nn.CrossEntropyLoss() counter = 1 for examples, labels, index in train_loader: examples, labels, index = examples.to(device), labels.to(device), index.to(device) # compute output & loss outputs = model(examples) loss = criterion(outputs, labels) # compute accuracy & updates prec1, prec5 = accuracy_v2(outputs, labels, top=[1, 5]) train_loss.update(loss.item(), examples.size(0)) top1.update(prec1.item(), examples.size(0)) top5.update(prec5.item(), examples.size(0)) # compute gradient and do optimizer step loss.backward() optimizer.step() optimizer.zero_grad() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if counter % 15 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, Accuracy: {:.0f}%, Learning rate: {:.6f}'.format( epoch, counter * len(examples), len(train_loader.dataset), 100. * counter / len(train_loader), loss.item(), prec1, optimizer.param_groups[0]['lr'])) counter += 1 return train_loss.avg, top5.avg, top1.avg, batch_time.sum
def train(epoch, model=None): models_to_train = all_models if model is None else [model] for i, (x, y) in enumerate(dataloader): y = y.unsqueeze(-1).float() x = x.to(device) y = y.to(device) for model in models_to_train: optimizer = optimizers[model.name] optimizer.zero_grad() y_pred = model(x) loss = criterion(y_pred, y) loss.backward() optimizer.step() if i % 100 == 0: print('Epoch: [{}], Iteration: [{}], Model: [{}], Loss: [{}]'. format(epoch, i, model.name, loss.item())) if i % 4 == 0: save_boundary_labels(model, epoch=epoch, iteration=i // 4) if i % 1000 == 0 and model.name.startswith('residual_v2'): print('multiplier: {}'.format(model.multiplier)) return i
op_params.append(argument) optimizer = optim.Adam(op_params) scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=int(2*len(data_loader_train))) model, optimizer = amp.initialize(model, optimizer, opt_level="O2",verbosity=0) logs, val_logs = [], [] best_val_loss, ea_patience = 1e10, 0 for epoch in range(max_epochs): losses = [] model.train() tk0 = tqdm(data_loader_train) torch.backends.cudnn.benchmark = True for step, batch in enumerate(tk0): inputs, labels = batch["image"].cuda().float(), batch["labels"].cuda().float() outputs = model(inputs) loss = criterion(outputs, labels) losses.append(loss.item()) with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() optimizer.step() optimizer.zero_grad() scheduler.step() tk0.set_postfix({'loss':np.nanmean(losses)}) logs.append(np.nanmean(losses)) torch.backends.cudnn.benchmark = False tk0 = tqdm(data_loader_val) val_losses = [] model.eval() with torch.no_grad(): for step, batch in enumerate(tk0):
device='cuda').cuda() loss_func_c = nn.MSELoss() loss_func_a = utils.UnbalanceBCELoss(p=params['unblc']) optimizer = torch.optim.Adam(model.parameters(), lr=params['LR']) Records_train = utils.Records(params['EPOCH']) Records_test = utils.Records(params['EPOCH']) print('training started at {}.'.format( datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))) for epoch in range(params['EPOCH']): loss_act_train, loss_ch_train, record_train = train_loop( model, optimizer, trainloader, loss_func_a, loss_func_c, params['nvar']) criterion_train = utils.criterion(record_train) loss_act_test, loss_ch_test, record_test = test_loop( model, testloader, loss_func_a, loss_func_c, params['nvar']) criterion_test = utils.criterion(record_test) Records_train.record(loss_act_train, loss_ch_train, criterion_train, epoch) Records_test.record(loss_act_test, loss_ch_test, criterion_test, epoch) if epoch % 50 == 0: print('Epoch {} at {}'.format(epoch, datetime.datetime.now())) torch.save(model.state_dict(), 'trained_models/{}.pkl'.format(starttime)) print('Model saved as {}.pkl'.format(starttime)) Records_train.plot('{}-train'.format(starttime)) Records_test.plot('{}-test'.format(starttime))