def val(model, val_loader, criterion, epoch, args, log_writer=False): global best_val_acc model.eval() val_loss = lib.Metric('val_loss') val_accuracy = lib.Metric('val_accuracy') if epoch == -1: epoch = args.epochs - 1 with tqdm(total=len(val_loader), desc='Validate Epoch #{}'.format(epoch + 1)) as t: with torch.no_grad(): for data, target in val_loader: if args.cuda: data, target = data.cuda(), target.cuda() output = model(data) val_loss.update(criterion(output, target)) val_accuracy.update(accuracy(output, target)) t.update(1) print("\nloss: {}, accuracy: {:.2f}, best acc: {:.2f}\n".format(val_loss.avg.item(), 100. * val_accuracy.avg.item(), 100. * max(best_val_acc, val_accuracy.avg))) if val_accuracy.avg > best_val_acc and log_writer: save_model(model, None, -1, args) if log_writer: log_writer.add_scalar('val/loss', val_loss.avg, epoch) log_writer.add_scalar('val/accuracy', val_accuracy.avg, epoch) best_val_acc = max(best_val_acc, val_accuracy.avg) log_writer.add_scalar('val/best_acc', best_val_acc, epoch)
def train_net(args): print("Init...") log_writer = tensorboardX.SummaryWriter(args.log_dir) # train_loader, _, val_loader, _ = lib.build_dataloader(args) model = lib.build_model(args) print(model.forward(torch.ones(1, 3, 224, 224)).shape) # print('Parameters:', sum([np.prod(p.size()) for p in model.parameters()])) model = torch.nn.DataParallel(model) optimizer = lib.build_optimizer(args, model) epoch = 0 if args.resume: epoch = resume_model(model, optimizer, args) args.cuda = not args.no_cuda and torch.cuda.is_available() cudnn.benchmark = True if args.label_smoothing: criterion = cross_entropy_with_label_smoothing else: # criterion = nn.CrossEntropyLoss() criterion = nn.MSELoss() if args.cuda: model.cuda() print("Start training...") while epoch < args.epochs: train(model, train_loader, optimizer, criterion, epoch, log_writer, args) if (epoch + 1) % args.test_epochs == 0: val(model, val_loader, criterion, epoch, args, log_writer) if (epoch + 1) % args.save_epochs == 0: save_model(model, optimizer, epoch, args) epoch += 1 save_model(model, optimizer, epoch - 1, args)
def main(): args = get_arguments() utils.make_dirs(args.save) train_f, val_f = utils.create_stats_files(args.save) name_model = args.model + "_" + args.dataset_name + "_" + utils.datestr() writer = SummaryWriter(log_dir='../runs/' + name_model, comment=name_model) best_prec1 = 100. start_epoch = 1 training_generator, val_generator, full_volume, affine = medical_loaders.generate_datasets( args, path='.././datasets') model, optimizer = medzoo.create_model(args) # we want to train in for labels 0 to 8 (9 classes) criterion = DiceLoss(classes=11, skip_index_after=args.classes) if args.cuda: torch.cuda.manual_seed(seed) model = model.cuda() print("Model transferred in GPU.....") print("START TRAINING...") for epoch in range(start_epoch, args.nEpochs + 1): train_stats = train.train_dice(args, epoch, model, training_generator, optimizer, criterion, train_f, writer) val_stats = train.test_dice(args, epoch, model, val_generator, criterion, val_f, writer) utils.write_train_val_score(writer, epoch, train_stats, val_stats) # TODO - check memory issues # if epoch % 5 == 0: # utils.visualize_no_overlap(args, full_volume, affine, model, epoch, DIM, writer) utils.save_model(model, args, val_stats[0], epoch, best_prec1) train_f.close() val_f.close()
def main(): args = get_arguments() utils.make_dirs(args.save) train_f, val_f = utils.create_stats_files(args.save) name_model = args.model + "_" + args.dataset_name + "_" + utils.datestr() writer = SummaryWriter(log_dir='../runs/' + name_model, comment=name_model) best_pred = 1.01 samples_train = 200 samples_val = 200 training_generator, val_generator, full_volume, affine = medical_loaders.generate_datasets( args, path='.././datasets', samples_train=samples_train, samples_val=samples_val) model, optimizer = medzoo.create_model(args) criterion = medzoo.DiceLoss2D(args.classes) if args.cuda: torch.cuda.manual_seed(seed) model = model.cuda() for epoch in range(1, args.nEpochs + 1): train_stats = train.train_dice(args, epoch, model, training_generator, optimizer, criterion, train_f, writer) val_stats = train.test_dice(args, epoch, model, val_generator, criterion, val_f, writer) utils.write_train_val_score(writer, epoch, train_stats, val_stats) best_pred = utils.save_model(model=model, args=args, dice_loss=val_stats[0], epoch=epoch, best_pred_loss=best_pred) train_f.close() val_f.close()
lr_scheduler = np.logspace(math.log10(config.SIAMFC.TRAIN.LR), math.log10(config.SIAMFC.TRAIN.LR_END), config.SIAMFC.TRAIN.END_EPOCH) gpu_num = torch.cuda.device_count() model = torch.nn.DataParallel(model, device_ids=range(gpu_num)).cuda() print('Model is using {} GPU(s)'.format(gpu_num)) for epoch in range(config.SIAMFC.TRAIN.START_EPOCH, config.SIAMFC.TRAIN.END_EPOCH): train_set = SiamFCDataset(config) train_loader = DataLoader(train_set, batch_size=config.SIAMFC.TRAIN.BATCH * gpu_num, num_workers=config.WORKERS, pin_memory=True, sampler=None) cur_lr = lr_scheduler[epoch] for param_group in optimizer.param_groups: param_group['lr'] = cur_lr model = siamfc_train(train_loader, model=model, optimizer=optimizer, epoch=epoch + 1, cur_lr=cur_lr, cfg=config) if epoch >= 4: save_model(model, epoch, optimizer, config.SIAMFC.TRAIN.MODEL, config)
num_workers=8, pin_memory=True ) print('Starting training...') best = 1e10 for epoch in range(start_epoch + 1, opt.num_epochs + 1): log_dict_train, _ = trainer.train(epoch, train_loader) logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) with torch.no_grad(): log_dict_val, preds = trainer.val(epoch, val_loader) for k, v in log_dict_val.items(): logger.scalar_summary('val_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if log_dict_val['loss'] < best: best = log_dict_val['loss'] save_model(os.path.join(opt.save_dir, 'model_best.pth'), epoch, model) save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch, model, optimizer) logger.write('\n') if epoch in opt.lr_step: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr
# training loop min_loss = 10000000 for epoch in range(EPOCHs): print('{} / {}'.format(epoch, EPOCHs)) stats.training.reset() training.train(model, stats, epoch=epoch) stats.training.update() stats.testing.reset() testing.test(model, stats, epoch=epoch) stats.testing.update() print(stats.testing.minloss) if min_loss > stats.testing.loss(): print('Saving model. Model improved : currect acc ', stats.testing.loss(), ' min loss, ', min_loss) print('\n\n\n\n') save_model(MODEL_PTH, model) min_loss = stats.testing.loss() else: print('Model didn\'t improve : currect acc ', stats.testing.loss(), ' min loss, ', min_loss) print('\n\n\n\n') # Plot the results. plt.figure(1) plt.semilogy(stats.training.lossLog, label='Training') # plt.semilogy(stats.testing.lossLog, label='Testing') plt.xlabel('Epoch') plt.ylabel('Loss') plt.legend()
# sample = sample.to(device) # target = target.to(device) # # output = net.forward(sample) # # stats.testing.correctSamples += torch.sum(snn.predict.getClass(output) == label).data.item() # stats.testing.numSamples += len(label) # # loss = error.numSpikes(output, target) # stats.testing.lossSum += loss.cpu().data.item() # stats.print(epoch, i) # Update stats. stats.update() if epoch % 100 == 0: save_model('autoencoder', net) save_model('autoencoder', net) # Plot the results. plt.figure(1) plt.semilogy(stats.training.lossLog, label='Training') # plt.semilogy(stats.testing.lossLog, label='Testing') plt.xlabel('Epoch') plt.ylabel('Loss') plt.legend() plt.figure(2) net = net.eval() x, y, c = trainingSet[0] x = x.to(device)