def train(model, train_set, test_set, save, valid_set, n_epochs, canal): # Data loaders train_loader = DataLoader(train_set, batch_size=cfg.train_batch_size, shuffle=True, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers) test_loader = DataLoader(test_set, batch_size=cfg.test_batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers) if valid_set is None: valid_loader = None else: valid_loader = DataLoader(valid_set, batch_size=cfg.batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers) # Model on cuda model = to_device(model) # Wrap model for multi-GPUs, if necessary model_wrapper = model if torch.cuda.is_available() and torch.cuda.device_count() > 1: model_wrapper = torch.nn.DataParallel(model).cuda() # Optimizer optimizer = torch.optim.Adam(model_wrapper.parameters(), lr=cfg.lr) #optimizer= torch.optim.SGD(model_wrapper.parameters(), lr=cfg.lr, momentum=0.9) #optimizer = adabound.AdaBound(model_wrapper.parameters(), lr=1e-3, final_lr=0.1) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.milestones, gamma=cfg.gamma) # Start log logs = ['loss', 'iou', 'dice' ] + ['iou{}'.format(i) for i in range(6)] + ['dice{}'.format(i) for i in range(6)] train_logs = ['train_' + log for log in logs] test_logs = ['test_' + log for log in logs] log_dict = OrderedDict.fromkeys(train_logs + test_logs, 0) with open(os.path.join(save, 'logs.csv'), 'w') as f: f.write('epoch,') for key in log_dict.keys(): f.write(key + ',') f.write('\n') writer = SummaryWriter(log_dir=os.path.join(save, 'canal_{}'.format(canal), 'Tensorboard_Results')) # Train model best_dice = 0 for epoch in range(n_epochs): os.makedirs(os.path.join(cfg.save, 'canal_{}'.format(canal)), exist_ok=True) train_meters = train_epoch(model=model_wrapper, loader=train_loader, optimizer=optimizer, epoch=epoch, n_epochs=n_epochs, writer=writer) # if (epoch+1)%5==0: test_meters = test_epoch(model=model_wrapper, loader=test_loader, epoch=epoch, is_test=True, writer=writer) scheduler.step() # Log results for i, key in enumerate(train_logs): log_dict[key] = train_meters[i] for i, key in enumerate(test_logs): log_dict[key] = test_meters[i] log_results(save, epoch, log_dict, writer=writer) if cfg.save_all: torch.save( model.state_dict(), os.path.join(save, 'canal_{}'.format(canal), 'model.dat')) if log_dict['test_dice'] > best_dice: torch.save( model.state_dict(), os.path.join(save, 'canal_{}'.format(canal), 'model.dat')) best_dice = log_dict['test_dice'] print('New best dice: %.4f' % log_dict['test_dice']) #print(2.*intersection/union) else: print('Current best dice: %.4f' % best_dice) #print(2.*intersection/union) writer.close() with open(os.path.join(save, 'canal_{}'.format(canal), 'logs.csv'), 'a') as f: f.write(',,,,best dice,%0.5f\n' % (best_dice)) # Final test of the best model on test set print('best dice: ', best_dice)
def train(model, train_set, test_set, save, valid_set, n_epochs): """ Main training function """ # Dataloaders train_loader = DataLoader( train_set, batch_size=cfg.batch_size, shuffle=True, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers ) test_loader = DataLoader( test_set, batch_size=cfg.batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers ) if valid_set is None: valid_loader = None else: valid_loader = DataLoader( valid_set, batch_size=cfg.batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers ) # Model on cuda model = to_device(model) # Wrap model for multi-GPUs, if necessary model_wrapper = model if torch.cuda.is_available() and torch.cuda.device_count() > 1: if cfg.use_syncbn: print("Using sync-bn") model_wrapper = DataParallelWithCallback(model).cuda() else: model_wrapper = torch.nn.DataParallel(model).cuda() # optimizer and scheduler optimizer = torch.optim.Adam(model_wrapper.parameters(), lr=cfg.lr) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.milestones, gamma=cfg.gamma) # Start logging logs = ["loss", "iou", "dice", "iou0", "iou1", "dice0", "dice1", "dice_global"] train_logs = ["train_" + log for log in logs] test_logs = ["test_" + log for log in logs] log_dict = OrderedDict.fromkeys(train_logs + test_logs, 0) with open(os.path.join(save, "logs.csv"), "w") as f: f.write("epoch,") for key in log_dict.keys(): f.write(key + ",") f.write("\n") with open(os.path.join(save, "loss_logs.csv"), "w") as f: f.write("iter,train_loss,\n") writer = SummaryWriter(log_dir=os.path.join(save, "Tensorboard_Results")) # train and test the model best_dice_global = 0 global iteration iteration = 0 for epoch in range(n_epochs): os.makedirs(os.path.join(cfg.save, "epoch_{}".format(epoch))) print("learning rate: ", scheduler.get_lr()) # train epoch train_meters = train_epoch( model=model_wrapper, loader=train_loader, optimizer=optimizer, epoch=epoch, n_epochs=n_epochs, writer=writer ) # test epoch test_meters = test_epoch(model=model_wrapper, loader=test_loader, epoch=epoch, is_test=True, writer=writer) scheduler.step() # Log results for i, key in enumerate(train_logs): log_dict[key] = train_meters[i] for i, key in enumerate(test_logs): log_dict[key] = test_meters[i] log_results(save, epoch, log_dict, writer=writer) # save model checkpoint if cfg.save_all: torch.save(model.state_dict(), os.path.join(save, "epoch_{}".format(epoch), "model.dat")) if log_dict["test_dice_global"] > best_dice_global: torch.save(model.state_dict(), os.path.join(save, "model.dat")) best_dice_global = log_dict["test_dice_global"] print("New best global dice: %.4f" % log_dict["test_dice_global"]) else: print("Current best global dice: %.4f" % best_dice_global) # end writer.close() with open(os.path.join(save, "logs.csv"), "a") as f: f.write(",,,,best global dice,%0.5f\n" % (best_dice_global)) print("best global dice: ", best_dice_global)
def train(model, train_set, test_set, save, valid_set, n_epochs): # Data loaders train_loader = DataLoader(train_set, batch_size=cfg.batch_size, shuffle=True, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers) test_loader = DataLoader(test_set, batch_size=cfg.batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers) if valid_set is None: valid_loader = None else: valid_loader = DataLoader(valid_set, batch_size=cfg.batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers) # Model on cuda model = to_device(model) # Wrap model for multi-GPUs, if necessary model_wrapper = model if torch.cuda.is_available() and torch.cuda.device_count() > 1: model_wrapper = torch.nn.DataParallel(model).cuda() # Optimizer # optimizer = torch.optim.SGD(model_wrapper.parameters(), lr=cfg.lr, weight_decay=cfg.wd, momentum=cfg.momentum) optimizer = torch.optim.Adam(model_wrapper.parameters(), lr=cfg.lr) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.milestones, gamma=cfg.gamma) # Start log logs = ['loss', 'acc'] train_logs = ['train_' + log for log in logs] test_logs = ['test_' + log for log in logs] log_dict = OrderedDict.fromkeys(train_logs + test_logs, 0) with open(os.path.join(save, 'logs.csv'), 'w') as f: f.write('epoch,') for key in log_dict.keys(): f.write(key + ',') f.write('\n') writer = SummaryWriter(log_dir=os.path.join(save, 'Tensorboard_Results')) # Train model best_iou = 0 for epoch in range(n_epochs): os.makedirs(os.path.join(cfg.save, 'epoch_{}'.format(epoch))) train_meters = train_epoch(model=model_wrapper, loader=train_loader, optimizer=optimizer, epoch=epoch, n_epochs=n_epochs, writer=writer) test_meters = test_epoch(model=model_wrapper, loader=test_loader, epoch=epoch, is_test=True, writer=writer) scheduler.step() # Log results for i, key in enumerate(train_logs): log_dict[key] = train_meters[i] for i, key in enumerate(test_logs): log_dict[key] = test_meters[i] log_results(save, epoch, log_dict, writer=writer) if cfg.save_all: torch.save( model.state_dict(), os.path.join(save, 'epoch_{}'.format(epoch), 'model.dat')) if log_dict['test_iou'] > best_iou: torch.save(model.state_dict(), os.path.join(save, 'model.dat')) best_iou = log_dict['test_iou'] print('New best iou: %.4f' % log_dict['test_iou']) writer.close() with open(os.path.join(save, 'logs.csv'), 'a') as f: f.write(',,,,best iou,%0.5f\n' % (best_iou)) # Final test of the best model on test set print('best iou: ', best_iou)
def train(model, train_set, test_set, save, valid_set, n_epochs): ''' Main training function ''' # Dataloaders train_loader = DataLoader(train_set, batch_size=cfg.batch_size, shuffle=True, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers) test_loader = DataLoader(test_set, batch_size=cfg.batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers) # modified if valid_set is None: valid_loader = None else: valid_loader = DataLoader(valid_set, batch_size=cfg.batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers) # Model on cuda model = to_device(model) # Wrap model for multi-GPUs, if necessary model_wrapper = model print('num_of_cuda:', torch.cuda.device_count()) if torch.cuda.is_available() and torch.cuda.device_count() > 1: print('multi-gpus') if cfg.use_syncbn: print('Using sync-bn') model_wrapper = DataParallelWithCallback(model).cuda() else: model_wrapper = torch.nn.DataParallel(model).cuda() # optimizer and scheduler optimizer = torch.optim.Adam(model_wrapper.parameters(), lr=cfg.lr) # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.milestones, # gamma=cfg.gamma) scheduler = torch.optim.lr_scheduler.OneCycleLR( optimizer, max_lr=cfg.max_lr, epochs=n_epochs, steps_per_epoch=len(train_loader)) # Start logging logs = ['loss', 'acc', 'acc0', 'acc1'] train_logs = ['train_' + log for log in logs] + [ 'train_auc', ] valid_logs = ['valid_' + log for log in logs] + ['valid_auc', 'valid_auc_pat'] test_logs = ['test_' + log for log in logs] + ['test_auc', 'test_auc_pat'] log_dict = OrderedDict.fromkeys(train_logs + valid_logs + test_logs, 0) with open(os.path.join(save, 'logs.csv'), 'w') as f: f.write('epoch,') for key in log_dict.keys(): f.write(key + ',') f.write('\n') with open(os.path.join(save, 'loss_logs.csv'), 'w') as f: f.write('iter,train_loss,\n') writer = SummaryWriter(log_dir=os.path.join(save, 'Tensorboard_Results')) # train and test the model best_auc = 0 global iteration iteration = 0 for epoch in range(n_epochs): os.makedirs(os.path.join(cfg.save, 'epoch_{}'.format(epoch))) print('learning rate: ', scheduler.get_lr()) # train epoch train_meters = train_epoch(model=model_wrapper, loader=train_loader, optimizer=optimizer, scheduler=scheduler, epoch=epoch, n_epochs=n_epochs, writer=writer) # valid epoch valid_meters = test_epoch(model=model_wrapper, loader=valid_loader, epoch=epoch, is_test=False, writer=writer) # test epoch test_meters = test_epoch(model=model_wrapper, loader=test_loader, epoch=epoch, is_test=True, writer=writer) # scheduler.step() # Log results for i, key in enumerate(train_logs): log_dict[key] = train_meters[i] for i, key in enumerate(valid_logs): log_dict[key] = valid_meters[i] for i, key in enumerate(test_logs): log_dict[key] = test_meters[i] log_results(save, epoch, log_dict, writer=writer) # save model checkpoint if cfg.save_all: torch.save( model.state_dict(), os.path.join(save, 'epoch_{}'.format(epoch), 'model.dat')) if log_dict['valid_auc'] > best_auc: torch.save(model.state_dict(), os.path.join(save, 'model.dat')) best_auc = log_dict['valid_auc'] print('New best auc: %.4f' % log_dict['valid_auc']) else: print('Current best auc: %.4f' % best_auc) # end writer.close() with open(os.path.join(save, 'logs.csv'), 'a') as f: f.write(',,,,best auc,%0.5f\n' % (best_auc)) print('best auc: ', best_auc)
def train(model, test_set, save, valid_set, n_epochs): ''' Main training function ''' # Dataloaders test_loader = DataLoader(test_set, batch_size=cfg.batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers) if valid_set is None: valid_loader = None else: valid_loader = DataLoader(valid_set, batch_size=cfg.batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers) # Model on cuda model = to_device(model) # Wrap model for multi-GPUs, if necessary model_wrapper = model if torch.cuda.is_available() and torch.cuda.device_count() > 1: print('multi-gpus') if cfg.use_syncbn: print('Using sync-bn') model_wrapper = DataParallelWithCallback(model).cuda() else: model_wrapper = torch.nn.DataParallel(model).cuda() # Start logging logs = ['loss', 'acc', 'acc0', 'acc1'] test_logs = ['test_' + log for log in logs] + ['test_auc', 'test_auc_pat'] log_dict = OrderedDict.fromkeys(test_logs, 0) with open(os.path.join(save, 'logs.csv'), 'w') as f: f.write('epoch,') for key in log_dict.keys(): f.write(key + ',') f.write('\n') with open(os.path.join(save, 'loss_logs.csv'), 'w') as f: f.write('iter,train_loss,\n') writer = SummaryWriter(log_dir=os.path.join(save, 'Tensorboard_Results')) # train and test the model best_auc = 0 global iteration iteration = 0 for epoch in range(1): os.makedirs(os.path.join(cfg.save, 'epoch_{}'.format(epoch))) # test epoch test_meters = test_epoch(model=model_wrapper, loader=test_loader, epoch=epoch, is_test=True, writer=writer) # Log results for i, key in enumerate(test_logs): log_dict[key] = test_meters[i] log_results(save, epoch, log_dict, writer=writer) # save model checkpoint if log_dict['test_auc'] > best_auc: torch.save(model.state_dict(), os.path.join(save, 'model.dat')) best_auc = log_dict['test_auc'] print('New best auc: %.4f' % log_dict['test_auc']) else: print('Current best auc: %.4f' % best_auc) # end writer.close() with open(os.path.join(save, 'logs.csv'), 'a') as f: f.write(',,,,best auc,%0.5f\n' % (best_auc)) print('best auc: ', best_auc)
def train(model, test_set, save, n_epochs): ''' Main training function ''' # Dataloaders test_loader = DataLoader(test_set, batch_size=cfg.batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers) # Model on cuda model = to_device(model) # Wrap model for multi-GPUs, if necessary model_wrapper = model print('num_of_cuda:',torch.cuda.device_count()) if torch.cuda.is_available() and torch.cuda.device_count() > 1: print('multi-gpus') if cfg.use_syncbn: print('Using sync-bn') model_wrapper = DataParallelWithCallback(model).cuda() else: model_wrapper = torch.nn.DataParallel(model).cuda() # optimizer and scheduler optimizer = torch.optim.Adam(model_wrapper.parameters(), lr=cfg.lr) # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.milestones,gamma=cfg.gamma) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,factor=cfg.factor, patience=cfg.patience, min_lr=cfg.min_lr, eps=cfg.eps) # scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=cfg.max_lr, epochs=n_epochs, steps_per_epoch=len(train_loader), # div_factor=cfg.div_factor, final_div_factor=cfg.final_div_factor) # Start logging logs = ['loss', 'acc', 'acc0', 'acc1'] test_logs = ['test_'+log for log in logs]+['test_auc','test_auc_pat'] log_dict = OrderedDict.fromkeys(test_logs, 0) with open(os.path.join(save, 'logs.csv'), 'w') as f: f.write('epoch,') for key in log_dict.keys(): f.write(key+',') f.write('\n') with open(os.path.join(save, 'loss_logs.csv'), 'w') as f: f.write('iter,train_loss,\n') writer = SummaryWriter(log_dir=os.path.join(save, 'Tensorboard_Results')) # train and test the model best_auc = 0 global iteration iteration = 0 for epoch in range(1): print('learning rate: ', optimizer.state_dict()['param_groups'][0]['lr']) # test epoch test_meters = test_epoch( model=model_wrapper, loader=test_loader, epoch=epoch, is_test=True, writer = writer ) # Log results for i, key in enumerate(test_logs): log_dict[key] = test_meters[i] log_results(save, epoch, log_dict, writer=writer) # save model checkpoint # if cfg.save_all: # end writer.close()