def train(netG, netD, noise_module, optimizerD, optimizerG, trainloader, valloader, device, batch_size, file, epoch, alpha): """ train, eval and save the model args: netG: nn.Module -> the generator netD: nn.Module -> the discriminator noise_module: nn.Module -> the module which compute the noised filter optimizerD: torch.optim -> the optimizer for the discriminator optimizerG: torch.optim -> the optimizer for the generator trainloder: torch.utils.data.DataLoader -> the train dataset valloder: torch.utils.data.DataLoader -> the validation dataset device: torch.device -> the device for the tensor batch_size: int -> the size of the tensor file: Path -> the file where you save the file epoch: int -> the number of iteration alpha: float -> the supervision term """ print(file) save_net(file, netG, netD) sauvegarde_init(file, "train") sauvegarde_init(file, "eval") netG.train() netD.train() cpt = 0 dTrue = deque(maxlen=1000) dFalse = deque(maxlen=1000) mse_train = deque(maxlen=1000) mse_val = deque(maxlen=1000) turn = True filtre_size = (batch_size, 3, 64, 64) bar_epoch = tqdm(range(epoch)) bar_data = tqdm(range(len(trainloader))) for e in bar_epoch: for i, (ref, y) in zip(bar_data, trainloader): real_label = torch.FloatTensor(ref.size(0)).fill_(.9).to(device) fake_label = torch.FloatTensor(ref.size(0)).fill_(.1).to(device) ref = ref.to(device) y = y.to(device) if i % 3 in [0, 1]: ################ # train D ################ optimizerD.zero_grad() # avec de vrais labels outputTrue = netD(y) lossDT = F.binary_cross_entropy_with_logits( outputTrue, real_label) # avec de faux labels x_hat = netG(y).detach() filtreD, noiseD = noise_module.forward(filtre_size, device) y_hat = x_hat * filtreD + noiseD outputFalse = netD(y_hat) lossDF = F.binary_cross_entropy_with_logits( outputFalse, fake_label) (lossDF + lossDT).backward() optimizerD.step() dTrue.append(torch.sigmoid(outputTrue).data.mean()) dFalse.append(torch.sigmoid(outputFalse).data.mean()) bar_epoch.set_postfix({ "D(x)": np.array(dTrue).mean(), "D(G(x))": np.array(dFalse).mean() }) else: ############# # train G ############# optimizerG.zero_grad() # improve G with Discriminator filtreG, noiseG = noise_module.forward(filtre_size, device) x_hat = netG(y) y_hat = filtreG * x_hat + noiseG outputDbruit = netD(y_hat) lossBruit = F.binary_cross_entropy_with_logits( outputDbruit, real_label) # imporve G with MSE x_tilde = netG(y_hat) y_tilde = filtreG * x_tilde.detach() + noiseG lossSupervise = F.mse_loss(y_hat, y_tilde) (alpha * lossSupervise + lossBruit).backward() optimizerG.step() mse_train.append(F.mse_loss(x_hat, ref).data) bar_data.set_postfix({"qual": np.array(mse_train).mean()}) sauvegarde(file, "train", np.array(dTrue).mean(), np.array(dFalse).mean(), np.array(mse_train).mean()) if i % 250 == 1: cpt += 1 netG.eval() bar_test = tqdm(range(len(valloader))) for j, (ref_eval, y_eval) in zip(bar_test, valloader): if turn: save_xb = y.to(device) print_img(save_xb, 'image_de_base_bruit', file) print_img(ref_eval, 'image_de_base_sans_bruit', file) turn = False y_eval = y_eval.to(device) img_gen = netG(y_eval).detach().cpu() mse_val.append(F.mse_loss(ref_eval, img_gen)) sauvegarde(file, "eval", np.array(mse_val).mean()) printG(save_xb, cpt, netG, file) netG.train() if i % 400 == 0 and i > 0: for g in optimizerD.param_groups: g['lr'] *= 0.995 for g in optimizerG.param_groups: g['lr'] *= 0.995 save_model(netG, netD, optimizerG, optimizerD, e, './log/net')
def train(self): # print ('a ' + str(self.config['domain'])) # print ('domainloss ' + str(self.config['domainloss'])) # print ('fadomainloss ' + str(self.config['fadomainloss'])) # print ('spatial ' + str(self.config['ori'])) for key in self.config.keys(): print(key + '\t' + str(self.config[key])) # self.featureExactor = torch.nn.DataParallel(self.featureExactor) # self.featureExactor1 = torch.nn.DataParallel(self.featureExactor1) # self.classfier = torch.nn.DataParallel(self.classfier) # self.domain[0] = torch.nn.DataParallel(self.domain[0]) # self.domain[1] = torch.nn.DataParallel(self.domain[1]) training_statistic = [] testing_s_statistic = [] testing_t_statistic = [] max_acc = 0 i = 0 for e in tqdm(range(0, self.EPOCHS)): self._lambda = (e + 1) / self.EPOCHS # _lambda = 0.0 res = self.train_perEpoch(e + 1) tqdm_result = '###EPOCH {}: Class: {:.6f}, domain: {:.6f}, cls-inv: {:.6f}, f_Loss: {:.6f}, c_Loss: {:.6f}, d_Loss: {:.6f}'.format( e + 1, sum(row['classification_loss'] / row['total_steps'] for row in res), # sum(row['coral_loss'] / row['total_steps'] for row in res), # sum(row['mmd_loss'] / row['total_steps'] for row in res), sum(row['domain_loss'] / row['total_steps'] for row in res), sum(row['class_loss'] / row['total_steps'] for row in res), sum(row['f_loss'] / row['total_steps'] for row in res), sum(row['c_loss'] / row['total_steps'] for row in res), sum(row['d_loss'] / row['total_steps'] for row in res), ) tqdm.write(tqdm_result) training_statistic.append(res) test_source = self.test(self.source_loader_test, e) test_target = self.test(self.target_loader, e) test_target_test = self.test(self.target_loader_test, e) testing_s_statistic.append(test_source) testing_t_statistic.append(test_target) tqdm_result = '###Test Source: Epoch: {}, avg_loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format( e + 1, test_source['average_loss'], test_source['correct'], test_source['total'], test_source['accuracy'], ) tqdm.write(tqdm_result) tqdm_result = '###Test Target: Epoch: {}, avg_loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format( e + 1, test_target['average_loss'], test_target['correct'], test_target['total'], test_target['accuracy'], ) tqdm.write(tqdm_result) tqdm_result = '###Test Target test: Epoch: {}, avg_loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format( e + 1, test_target_test['average_loss'], test_target_test['correct'], test_target_test['total'], test_target_test['accuracy'], ) tqdm.write(tqdm_result) if test_target_test['accuracy'] > max_acc: max_acc = test_target_test['accuracy'] i = e root = '/unsullied/sharefs/wangyimu/data/results/fine-grained/semi-supervised/ourdataset/' + \ self.config[ 'source'] + '_' + self.config['target'] if not os.path.exists(root): os.makedirs(root) root = root + '/best/' + '{:.4f}'.format(max_acc) + '/' if not os.path.exists(root): os.makedirs(root) tqdm.write( utils.save_net(self.classfier, root + '/classifier_checkpoint.tar')) tqdm.write( utils.save_net(self.featureExactor, root + '/featureExactor_checkpoint.tar')) tqdm.write( utils.save_net(self.featureExactor1, root + '/featureExactor1_checkpoint.tar')) tqdm.write( utils.save_net( self.domain[0], root + '/domainDiscriminator0_checkpoint.tar')) tqdm.write( utils.save_net( self.domain[1], root + '/domainDiscriminator1_checkpoint.tar')) bestnow = '###Epoch: {},Accuracy: {:.2f}'.format( i, max_acc, ) tqdm.write(bestnow) for key in self.config.keys(): print(key + '\t' + str(self.config[key])) print(bestnow) tqdm.write( utils.save_net(self.classfier, root + '/classifier_final.tar')) tqdm.write( utils.save_net(self.featureExactor, root + '/featureExactor_final.tar')) tqdm.write( utils.save_net(self.featureExactor1, root + '/featureExactor1_final.tar')) tqdm.write( utils.save_net(self.domain[0], root + '/domainDiscriminator0_final.tar')) tqdm.write( utils.save_net(self.domain[1], root + '/domainDiscriminator1_final.tar')) print(utils.save(training_statistic, root + '/training_statistic.pkl')) print( utils.save(testing_s_statistic, root + '/testing_s_statistic.pkl')) print( utils.save(testing_t_statistic, root + '/testing_t_statistic.pkl'))
device=device) t_correct = test(model, data_loader.target_test_loader, epoch=epoch, mode="testing", device=device) if t_correct > correct: correct = t_correct print( 'source: {} to target: {} max correct: {} max accuracy{: .2f}%\n'. format(settings.source_name, settings.target_name, correct, 100. * correct / data_loader.len_target_dataset)) fig, ax = plt.subplots(1, 2, figsize=(16, 5)) ax[0].plot(x_train, y_train, 'g', label='train') ax[0].plot(x_test, y_test, 'r', label='val') ax[0].set_title('Loss') ax[0].legend() ax[1].plot(x_train, acc_train, 'g', label='train') ax[1].plot(x_test, acc_test, 'r', label='val') ax[1].set_title('Accuracy') ax[1].legend() fig.suptitle('ResNet50 w/ DeepCORAL', fontsize=18) fig.savefig( f'result_plots/coral_loss_ep{settings.epochs}_opt{args.opt}_bs{settings.batch_size}_L2{settings.l2_decay}_lr{settings.lr}.png', dpi=90) utils.save_net(model, 'checkpoint.tar')
def main(): parser = argparse.ArgumentParser() # general & dataset & training settings parser.add_argument('--k_max', type=int, default=5, help='Max reconstruction iterations') parser.add_argument('--save_figs', type = lambda x:bool(strtobool(x)), default=True, help='save pics in reconstruction') parser.add_argument('--img_mode', type=str, default='SimpleCT', help=' image-modality reconstruction: SimpleCT') parser.add_argument('--train_size', type=int, default=4000, help='dataset size') parser.add_argument('--pseudo_inverse_init', type = lambda x:bool(strtobool(x)), default=True, help='initialise with pseudoinverse') parser.add_argument('--brain', type = lambda x:bool(strtobool(x)), default=False, help='test set of brain images') parser.add_argument('--epochs', type=int, default=150, help='number of epochs to train') parser.add_argument('--batch_size', type=int, default=128, help='input batch size for training') parser.add_argument('--initial_lr', type=float, default=1e-3, help='initial_lr') parser.add_argument('--val_batch_size', type=int, default=128, help='input batch size for valing') # forward models setting parser.add_argument('--size', type=int, default=128, help='image size') parser.add_argument('--beam_num_angle', type=int, default=30, help='number of angles / projections') # options parser.add_argument('--no_cuda', type = lambda x:bool(strtobool(x)), default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=222, help='random seed') args = parser.parse_args() layer_utils.set_gpu_mode(True) torch.manual_seed(args.seed) np.random.seed(args.seed) use_cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device('cuda' if use_cuda else 'cpu') if args.img_mode is not None: forward_model = ForwardModel() half_size = args.size / 2 space = odl.uniform_discr([-half_size, -half_size], [half_size, half_size], [args.size, args.size], dtype='float32') forward_model.space = space geometry = odl.tomo.parallel_beam_geometry(space, num_angles=args.beam_num_angle) forward_model.geometry = geometry operator = odl.tomo.RayTransform(space, geometry) opnorm = odl.power_method_opnorm(operator) forward_model.operator = odl_torch.OperatorModule( (1 / opnorm) * operator ) forward_model.adjoint = odl_torch.OperatorModule(operator.adjoint) pseudoinverse = odl.tomo.fbp_op(operator) pseudoinverse = odl_torch.OperatorModule( pseudoinverse * opnorm ) forward_model.pseudoinverse = pseudoinverse geometry_specs = 'full_view_sparse_' + str(args.beam_num_angle) dataset_name = 'dataset' + '_' + args.img_mode + '_' + str(args.size) \ + '_' + str(args.train_size) + '_' + geometry_specs + '_' \ + 'brain' + '_' + str(args.brain) if args.img_mode == SimpleCT.__name__: img_mode = SimpleCT(forward_model) data_constructor = DatasetConstructor(img_mode, train_size=args.train_size, brain=args.brain, dataset_name=dataset_name) data = data_constructor.data() else: raise NotImplementedError dataset = DataSet(data, img_mode, args.pseudo_inverse_init) optim_parms = {'epochs':args.epochs, 'initial_lr': args.initial_lr, 'batch_size': args.batch_size} from hybrid_model import HybridModel as NeuralLearner # results directory path = os.path.dirname(__file__) dir_path = os.path.join(path, 'results', args.img_mode, 'MFVI', str(args.train_size), geometry_specs, str(args.seed)) if not os.path.isdir(dir_path): os.makedirs(dir_path) # all config print('===========================\n', flush=True) for key, val in vars(args).items(): print('{}: {}'.format(key, val), flush=True) print('===========================\n', flush=True) blocks_history = {'model': [], 'optimizer': []} arch_args = {'arch': {'up': [ [1, 16, 3, 1, 1], [16, 32, 3, 1, 1]], 'low': [ [1, 16, 3, 1, 1], [16, 32, 3, 1, 1]], 'cm': [ [64, 32, 3, 1, 1], [32, 16, 3, 1, 1]] }} # savings training procedures filename = 'train_phase' filepath = os.path.join(dir_path, filename) vis = TrainVisualiser(filepath) start_time = time.time() # looping through architecture-blocs for idx in range(1, args.k_max + 1): print('============== training block number: {} ============= \n'.format(idx), flush=True) train_tensor = dataset.construct(flag='train') val_tensor = dataset.construct(flag='validation') train_loader = DataLoader(train_tensor, batch_size=args.batch_size, shuffle=True) val_loader = DataLoader(val_tensor, batch_size=args.val_batch_size, shuffle=True) model = NeuralLearner(arch_args) model = model.to(device) model_path = os.path.join(dir_path, str(idx) + '.pt') if os.path.exists(model_path): model_loaded = True model.load_state_dict(torch.load(model_path)) print('idx: {} model loaded!\npath to model:\n{}'.format(idx, model_path), flush=True) else: model_loaded = False model.optimise(train_loader, **optim_parms) save_net(model, os.path.join(dir_path, str(idx) + '.pt')) print('idx: {} optimisation finished!'.format(idx), flush=True) start = time.time() info = next_step_update(dataset, train_tensor, model, device, flag='train') end = time.time() print('============= {} {:.4f} ============= \n'.format('training reconstruction', end-start), flush=True) for key in info.keys(): print('{}: {} \n'.format(key, info[key]), flush=True) start = time.time() info = next_step_update(dataset, val_tensor, model, device, flag='validation') end = time.time() print('============= {} {:.4f} ============= \n'.format('validation reconstruction', end-start), flush=True) for key in info.keys(): print('{}: {} \n'.format(key, info[key]), flush=True) vis.update(dataset, flag='validation') blocks_history['model'].append(model) # reconstruction resonstruction_dir_path = os.path.join(dir_path, str(idx)) if model_loaded: resonstruction_dir_path = os.path.join(dir_path, str(idx), 're-loaded') if not os.path.isdir(resonstruction_dir_path): os.makedirs(resonstruction_dir_path) get_stats(dataset, blocks_history, device, resonstruction_dir_path) print('--- training time: %s seconds ---' % (time.time() - start_time), flush=True) vis.generate()
def main(): global args, best_prec1 best_prec1 = 1e6 args = parser.parse_args() args.original_lr = 1e-7 args.lr = 1e-7 args.batch_size = 1 args.momentum = 0.95 args.decay = 5 * 1e-4 args.start_epoch = 0 args.epochs = 800 args.steps = [-1, 1, 100, 150] args.scales = [1, 1, 1, 1] args.workers = 4 args.seed = time.time() args.print_freq = 30 args.train_json = './json/mypart_B_train.json' args.test_json = './json/mypart_B_test.json' args.gpu = '0' args.task = 'shanghaiB' args.pre = 'shanghaiBcheckpoint.pth.tar' with open(args.train_json, 'r') as outfile: train_list = json.load(outfile) with open(args.test_json, 'r') as outfile: val_list = json.load(outfile) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu torch.cuda.manual_seed(args.seed) model = CSRNet() model = model.cuda() criterion = nn.MSELoss(size_average=False).cuda() criterion1 = myloss().cuda() # criterion1 = nn.L1Loss().cuda() # optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), args.lr) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.decay) if args.pre: if os.path.isfile(args.pre): print("=> loading checkpoint '{}'".format(args.pre)) checkpoint = torch.load(args.pre) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.pre, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.pre)) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) train(train_list, model, criterion, criterion1, optimizer, epoch) prec1 = validate(val_list, model, criterion) is_best = prec1 < best_prec1 best_prec1 = min(prec1, best_prec1) print(' * best MAE {mae:.3f} '.format(mae=best_prec1)) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.pre, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, args.task) save_net('best.h5', model)
.format( e + 1, test_target['average_loss'], test_target['correct'], test_target['total'], test_target['accuracy'], )) logger.scalar_summary( "loss/coral", sum(row['coral_loss'] / row['total_steps'] for row in res), e + 1) logger.scalar_summary( "loss/fc7_coral", sum(row['fc7coral_loss'] / row['total_steps'] for row in res), e + 1) logger.scalar_summary( "loss/source", sum(row['classification_loss'] / row['total_steps'] for row in res), e + 1) logger.scalar_summary( "loss/deco_norm", sum(row['deco_norm'] / row['total_steps'] for row in res), e + 1) logger.scalar_summary("acc/target", test_target['accuracy'], e + 1) logger.scalar_summary("acc/source", test_source['accuracy'], e + 1) logger.scalar_summary("lr", scheduler.get_lr()[0], e + 1) logger.scalar_summary("weights/lambda", _lambda, e + 1) print("It took %g seconds" % (time.time() - start)) utils.save(training_statistic, 'training_statistic.pkl') utils.save(testing_s_statistic, 'testing_s_statistic.pkl') utils.save(testing_t_statistic, 'testing_t_statistic.pkl') utils.save_net(model, 'models/checkpoint_%s.tar' % name)
def train(opt, netG, netD, optim_G, optim_D): tensor = torch.cuda.FloatTensor # lossD_list = [] # lossG_list = [] train = ReadConcat(opt) trainset = DataLoader(train, batch_size=opt.batchSize, shuffle=True) save_img_path = os.path.join('./result', 'train') check_folder(save_img_path) for e in range(opt.epoch, opt.niter + opt.niter_decay + 1): for i, data in enumerate(trainset): # set input data_A = data['A'] # blur data_B = data['B'] #sharp # plt.imshow(image_recovery(data_A.squeeze().numpy())) # plt.pause(0) # print(data_A.shape) # print(data_B.shape) if torch.cuda.is_available(): data_A = data_A.cuda(opt.gpu) data_B = data_B.cuda(opt.gpu) # forward realA = Variable(data_A) fakeB = netG(realA) realB = Variable(data_B) # optimize_parameters # optimizer netD set_requires_grad([netD], True) for iter_d in range(1): optim_D.zero_grad() loss_D, _ = get_loss(tensor, netD, realA, fakeB, realB) loss_D.backward() optim_D.step() # optimizer netG set_requires_grad([netD], False) optim_G.zero_grad() _, loss_G = get_loss(tensor, netD, realA, fakeB, realB) loss_G.backward() optim_G.step() if i % 50 == 0: # lossD_list.append(loss_D) # lossG_list.append(loss_G) print('{}/{}: lossD:{}, lossG:{}'.format(i, e, loss_D, loss_G)) visul_img = torch.cat((realA, fakeB, realA), 3) #print(type(visul_img), visul_img.size()) visul_img = image_recovery(visul_img) #print(visul_img.size) save_image(visul_img, os.path.join(save_img_path, 'epoch' + str(e) + '.png')) if e > opt.niter: update_lr(optim_D, opt.lr, opt.niter_decay) lr = (optim_G, opt.lr, opt.niter_decay) opt.lr = lr if e % opt.save_epoch_freq == 0: save_net(netG, opt.checkpoints_dir, 'G', e) save_net(netD, opt.checkpoints_dir, 'D', e)
def run(path_to_net, label_dir, nii_dir, plotter, batch_size=32, test_split=0.3, random_state=666, epochs=8, learning_rate=0.0001, momentum=0.9, num_folds=5): """ Applies training and validation on the network """ print('Setting started', flush=True) nii_filenames = np.asarray(glob.glob(nii_dir + '/*.npy')) print('Number of files: ', len(nii_filenames), flush=True) # Creating data indices dataset_size = len(nii_filenames) indices = list(range(dataset_size)) test_indices, trainset_indices = utils.get_test_indices( indices, test_split) # kfold index generator for cv_num, (train_idx, val_idx) in enumerate( utils.get_train_cv_indices(trainset_indices, num_folds, random_state)): # take from trainset_indices the kfold generated ones train_indices = np.asarray(trainset_indices)[np.asarray(train_idx)] val_indices = np.asarray(trainset_indices)[np.asarray(val_idx)] print('cv cycle number: ', cv_num, flush=True) net = Net() device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') num_GPU = torch.cuda.device_count() print('Device: ', device, flush=True) if num_GPU > 1: print('Let us use', num_GPU, 'GPUs!', flush=True) net = nn.DataParallel(net) net.to(device) # weigh the loss with the size of classes # class 0: 3268 # class 1: 60248 weight = torch.tensor([1. / 3268., 1. / 60248.]).to(device) criterion = nn.CrossEntropyLoss(weight=weight) optimizer = optim.Adam(net.parameters(), lr=learning_rate) scheduler = ReduceLROnPlateau(optimizer, threshold=1e-6, patience=0, verbose=True) fMRI_dataset_train = dataset.fMRIDataset(label_dir, nii_dir, train_indices, transform=dataset.ToTensor()) fMRI_dataset_val = dataset.fMRIDataset(label_dir, nii_dir, val_indices, transform=dataset.ToTensor()) datalengths = { 'train': len(fMRI_dataset_train), 'val': len(fMRI_dataset_val) } dataloaders = { 'train': utils.get_dataloader(fMRI_dataset_train, batch_size, num_GPU), 'val': utils.get_dataloader(fMRI_dataset_val, batch_size, num_GPU) } print('Train set length {}, Val set length {}: '.format( datalengths['train'], datalengths['val'])) # Setup metrics running_metrics_val = metrics.BinaryClassificationMeter() running_metrics_train = metrics.BinaryClassificationMeter() val_loss_meter = metrics.averageLossMeter() train_loss_meter = metrics.averageLossMeter() # Track iteration number over epochs for plotter itr = 0 # Track lowest loss over epochs for saving network lowest_loss = 100000 for epoch in tqdm(range(epochs), desc='Epochs'): print('Epoch: ', epoch + 1, flush=True) print('Phase: train', flush=True) phase = 'train' # Set model to training mode net.train(True) # Iterate over data. for i, data in tqdm(enumerate(dataloaders[phase]), desc='Dataiteration_train'): train_pred, train_labels, train_loss = train( data, optimizer, net, criterion, device) running_metrics_train.update(train_pred, train_labels) train_loss_meter.update(train_loss, n=1) if (i + 1) % 10 == 0: print('Number of Iteration [{}/{}]'.format( i + 1, int(datalengths[phase] / batch_size)), flush=True) itr += 1 score = running_metrics_train.get_scores() for k, v in score.items(): plotter.plot(k, 'itr', phase, k, itr, v) print(k, v, flush=True) print('Loss Train', train_loss_meter.avg, flush=True) plotter.plot('Loss', 'itr', phase, 'Loss Train', itr, train_loss_meter.avg) utils.save_scores(running_metrics_train.get_history(), phase, cv_num) utils.save_loss(train_loss_meter.get_history(), phase, cv_num) print('Phase: val', flush=True) phase = 'val' # Set model to validation mode net.train(False) with torch.no_grad(): for i, data in tqdm(enumerate(dataloaders[phase]), desc='Dataiteration_val'): val_pred, val_labels, val_loss = val( data, net, criterion, device) running_metrics_val.update(val_pred, val_labels) val_loss_meter.update(val_loss, n=1) if (i + 1) % 10 == 0: print('Number of Iteration [{}/{}]'.format( i + 1, int(datalengths[phase] / batch_size)), flush=True) utils.save_scores(running_metrics_val.get_history(), phase, cv_num) utils.save_loss(val_loss_meter.get_history(), phase, cv_num) if val_loss_meter.avg < lowest_loss: lowest_loss = val_loss_meter.avg utils.save_net(path_to_net, batch_size, epoch, cv_num, train_indices, val_indices, test_indices, net, optimizer, criterion, iter_num=i) # Plot validation metrics and loss at the end of the val phase score = running_metrics_val.get_scores() for k, v in score.items(): plotter.plot(k, 'itr', phase, k, itr, v) print(k, v, flush=True) print('Loss Val', val_loss_meter.avg, flush=True) plotter.plot('Loss', 'itr', phase, 'Loss Val', itr, val_loss_meter.avg) print( 'Epoch [{}/{}], Train_loss: {:.4f}, Train_bacc: {:.2f}'.format( epoch + 1, epochs, train_loss_meter.avg, running_metrics_train.bacc), flush=True) print('Epoch [{}/{}], Val_loss: {:.4f}, Val_bacc: {:.2f}'.format( epoch + 1, epochs, val_loss_meter.avg, running_metrics_val.bacc), flush=True) # Call the learning rate adjustment function after every epoch scheduler.step(train_loss_meter.avg) # Save net after every cross validation cycle utils.save_net(path_to_net, batch_size, epochs, cv_num, train_indices, val_indices, test_indices, net, optimizer, criterion)