def inference(args, model, test_save_path=None): from datasets.dataset_HuBMAP import HuBMAP_dataset,RandomGenerator from datasets.dataset_HuBMAP import HuBMAP_dataset, Generator db_test = HuBMAP_dataset(base_dir=args.root_path, split="test", list_dir=args.list_dir,transform=transforms.Compose( [Generator(output_size=[args.img_size, args.img_size])])) testloader = DataLoader(db_test, batch_size=batch_size, shuffle=True, num_workers=1) logging.info("{} test iterations per epoch".format(len(testloader))) model.eval() metric_list = 0.0 ### Add validation here total_test_loss = 0 total_test_dice_loss = 0 batch_num = 0 label_batch_sum = 0 ce_loss = CrossEntropyLoss() num_classes = args.num_classes dice_loss = DiceLoss(num_classes) for i_batch, sampled_batch in enumerate(testloader): print(" testing progress: {:.2f}".format(batch_num/len(testloader)*100) + "%", end="\r") model.eval() image_batch, label_batch = sampled_batch['image'], sampled_batch['label'] image_batch, label_batch = image_batch.cuda(), label_batch.cuda() #print(label_batch.size()) a = np.sum(label_batch.detach().cpu().numpy()) print(a) outputs = model(image_batch) if a>label_batch_sum: label_batch_sum = a np.save('test_pred.npy', outputs.detach().cpu().numpy()) np.save('test_img.npy', image_batch.detach().cpu().numpy()) np.save('test_label.npy',label_batch.detach().cpu().numpy()) loss_ce = ce_loss(outputs, label_batch[:].long()) loss_dice = dice_loss(outputs, label_batch, softmax=True) loss = 0.5 * loss_ce + 0.5 * loss_dice ### total_test_loss += loss.item() total_test_dice_loss += loss_dice.item() ### batch_num = batch_num + 1 avg_test_loss = total_test_loss/batch_num avg_test_loss_dice = total_test_dice_loss/batch_num print(avg_test_loss_dice) writer = SummaryWriter(snapshot_path + '/log') writer.add_scalar('info/avg_test_loss', avg_test_loss) writer.add_scalar('info/avg_test_loss_dice', avg_test_loss_dice) logging.info('test_loss : %f, test_loss_dice: %f' % (avg_test_loss, avg_test_loss_dice)) ### return "Testing Finished!"
def __init__(self, n_channels, n_classes): super(Unet, self).__init__() self.criterion = DiceLoss() self.inc = InConv(n_channels, 64) self.down1 = Down(64, 128) self.down2 = Down(128, 256) self.down3 = Down(256, 512) self.down4 = Down(512, 512) self.up1 = Up(1024, 256) self.up2 = Up(512, 128) self.up3 = Up(256, 64) self.up4 = Up(128, 64) self.outc = OutConv(64, n_classes)
def get_compiled(loss={ "clf": 'categorical_crossentropy', "seg": DiceLoss() }, optimizer='adam', metrics={ 'clf': ['categorical_accuracy', precision, recall], 'seg': [precision, recall] }, loss_weights={ "clf": 1., "seg": .2 }, weights=None, **kwargs): model = get_model(weights=weights, **kwargs) model.compile(loss=loss, optimizer=optimizer, metrics=metrics, loss_weights=loss_weights) return model
def main(args): #################### init logger ################################### log_dir = './eval' + '/{}'.format(args.dataset) + '/{}'.format(args.model) logger = get_logger(log_dir) print('RUNDIR: {}'.format(log_dir)) logger.info('{}-Eval'.format(args.model)) # setting args.save_path = log_dir args.save_images = os.path.join(args.save_path, "images") if not os.path.exists(args.save_images): os.mkdir(args.save_images) ##################### init device ################################# if args.manualSeed is None: args.manualSeed = random.randint(1, 10000) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) args.use_cuda = args.gpus > 0 and torch.cuda.is_available() args.device = torch.device('cuda' if args.use_cuda else 'cpu') if args.use_cuda: torch.cuda.manual_seed(args.manualSeed) cudnn.benchmark = True ####################### init dataset ########################################### val_loader = get_dataloder(args, split_flag="valid") ######################## init model ############################################ if args.model == "layer7_double_deep_ep1600_8lr4e-3": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'layer7_double_deep' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=9, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) args.model_path = './logs/cvc/layer7_double_deep_ep1600_8lr4e-3/model_best.pth.tar' model.load_state_dict( torch.load(args.model_path, map_location='cpu')['state_dict']) elif args.model == "alpha0_double_deep": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'alpha0_stage1_double_deep_ep200' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) args.model_path = './logs/cvc/alpha0_8lr4e-3/model_best.pth.tar' state_dict = torch.load(args.model_path, map_location='cpu')['state_dict'] state_dict = remove_module(state_dict) model.load_state_dict(state_dict) elif args.model == "alpha0_5_double_deep": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'alpha0_5_stage1_double_deep_ep80' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) args.model_path = './logs/cvc/alpha0_5_8lr4e-3/model_best.pth.tar' state_dict = torch.load(args.model_path, map_location='cpu')['state_dict'] state_dict = remove_module(state_dict) model.load_state_dict(state_dict) elif args.model == "alpha1_double_deep": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'alpha0_5_stage1_double_deep_ep80' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) args.model_path = './logs/cvc/alpha1_8lr4e-3/model_best.pth.tar' state_dict = torch.load(args.model_path, map_location='cpu')['state_dict'] state_dict = remove_module(state_dict) model.load_state_dict(state_dict) else: raise NotImplementedError() setting = {k: v for k, v in args._get_kwargs()} logger.info(setting) logger.info(genotype) logger.info('param size = %fMB', calc_parameters_count(model)) # init loss if args.loss == 'bce': criterion = nn.BCELoss() elif args.loss == 'bcelog': criterion = nn.BCEWithLogitsLoss() elif args.loss == "dice": criterion = DiceLoss() elif args.loss == "softdice": criterion = SoftDiceLoss() elif args.loss == 'bcedice': criterion = BCEDiceLoss() else: criterion = nn.CrossEntropyLoss() if args.use_cuda: logger.info("load model and criterion to gpu !") model = model.to(args.device) criterion = criterion.to(args.device) infer(args, model, criterion, val_loader, logger, args.save_images)
from torchmeta.utils.data import BatchMetaDataLoader from maml import ModelAgnosticMetaLearning from data import get_datasets from models import Unet, ResUnet, FCN8 from utils import FocalLoss, BCEDiceFocalLoss, plot_errors, plot_accuracy, plot_iou, DiceLoss import math, time import json, os, logging download_data = True # Download data to local file (won't download if already there) bce_dice_focal = False # If True, adjusts y_lim in error plot augment = True # Use data augmentation #loss_function = torch.nn.BCEWithLogitsLoss() loss_function = DiceLoss() """not working:""" #loss_function = torch.nn.CrossEntropyLoss() #loss_function = FocalLoss() #loss_function = BCEDiceFocalLoss() #bce_dice_focal = True def main(args): logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) device = torch.device( 'cuda' if args.use_cuda and torch.cuda.is_available() else 'cpu') # Create output folder if (args.output_folder is not None):
import torch from torch.nn.utils import clip_grad_norm import torch.nn.functional as F import time import os import ipdb from utils import AverageMeter, calculate_accuracy, f1_score, fuse_2d, grad_cam, show_cam_on_image, ModelOutputs import numpy as np from sklearn.metrics import average_precision_score, roc_auc_score from utils import DiceLoss from apex import amp dice_loss = DiceLoss() def train_epoch(epoch, data_loader, model, criterion, optimizer, opt, logger): print('train at epoch {}'.format(epoch)) batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() # for tsn if opt.model_type == 'tsn': if opt.no_partialbn: model.module.partialBN(False) else: model.module.partialBN(True)
# else: # model.apply(weights_init) #################### #### SE-Densenet 121 #### from densenet import se_densenet121 model = se_densenet121(pretrained=False, num_channels=1, num_classes=5) if opt.weight_dir: model.load_state_dict(torch.load(opt.weight_dir)) print('weight loaded') ######################### model = model.to(device) # Optimization cross_entropy = nn.BCEWithLogitsLoss().to(device) # supervised loss dice_loss = DiceLoss().to(device) auc_loss = AUCLoss().to(device) kl_divergence = nn.KLDivLoss(reduction='batchmean').to( device) # unsupervised loss (consistency loss) # proxy_w*(ce_w*ce_loss + d_w*d_loss) + a_w*a_loss proxy_weight, ce_weight, d_weight, a_weight = 1.5, 0.8, 0.0, 1.0 if uda: supervised_weight, unsupervised_weight = 1.0, 5.0 else: supervised_weight, unsupervised_weight = 1.0, 0.0 optimizer = optim.SGD(model.parameters(), lr=opt.lr, momentum=0.99, nesterov=True, weight_decay=5e-4)
def main(args): #args.model_list=['alpha0_double_deep_0.01','alpha0_5_double_deep_0.01','alpha1_double_deep_0.01','nodouble_deep','slim_dd','slim_double','slim_nodouble','slim_nodouble_deep'] #args.model_list=["double_deep","nodouble_deep","slim_nodouble"] #args.model_list=["slim_nodouble_deep_init32"] #args.model_list=["slim_nodouble_deep_init48"] args.model_list = [ 'alpha0_double_deep_0.01', 'alpha0_5_double_deep_0.01', 'alpha1_double_deep_0.01' ] for model_name in args.model_list: if model_name == "alpha0_double_deep_0.01": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'alpha0_stage1_double_deep_ep200' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) args.model_path = './logs/isic2018/alpha0_double_deep_0.01/model_best.pth.tar' # kwargs = {'map_location': lambda storage, loc: storage.cuda(0)} # state_dict = torch.load(args.model_path, **kwargs) # # create new OrderedDict that does not contain `module.` # model.load_state_dict(state_dict) state_dict = torch.load(args.model_path, map_location='cpu')['state_dict'] state_dict = remove_module(state_dict) model.load_state_dict(state_dict) elif model_name == "alpha0_5_double_deep_0.01": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'alpha0_5_stage1_double_deep_ep80' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) args.model_path = './logs/isic2018/alpha0_5_double_deep_0.01/model_best.pth.tar' state_dict = torch.load(args.model_path, map_location='cpu')['state_dict'] state_dict = remove_module(state_dict) model.load_state_dict(state_dict) #model.load_state_dict(torch.load(args.model_path, map_location='cpu')['state_dict']) elif model_name == "alpha1_double_deep_0.01": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'alpha1_stage1_double_deep_ep200' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) args.model_path = './logs/isic2018/alpha1_double_deep_0.01/model_best.pth.tar' state_dict = torch.load(args.model_path, map_location='cpu')['state_dict'] state_dict = remove_module(state_dict) model.load_state_dict(state_dict) #model.load_state_dict(torch.load(args.model_path, map_location='cpu')['state_dict']) #################### init logger ################################### log_dir = './eval' + '/{}'.format( args.dataset) + '/{}'.format(model_name) ##################### init model ######################################## logger = get_logger(log_dir) print('RUNDIR: {}'.format(log_dir)) logger.info('{}-Eval'.format(model_name)) # setting args.save_path = log_dir args.save_images = os.path.join(args.save_path, "images") if not os.path.exists(args.save_images): os.mkdir(args.save_images) ##################### init device ################################# if args.manualSeed is None: args.manualSeed = random.randint(1, 10000) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) args.use_cuda = args.gpus > 0 and torch.cuda.is_available() args.device = torch.device('cuda' if args.use_cuda else 'cpu') if args.use_cuda: torch.cuda.manual_seed(args.manualSeed) cudnn.benchmark = True ####################### init dataset ########################################### # sorted vaild datasets val_loader = get_dataloder(args, split_flag="valid") setting = {k: v for k, v in args._get_kwargs()} logger.info(setting) logger.info(genotype) logger.info('param size = %fMB', calc_parameters_count(model)) # init loss if args.loss == 'bce': criterion = nn.BCELoss() elif args.loss == 'bcelog': criterion = nn.BCEWithLogitsLoss() elif args.loss == "dice": criterion = DiceLoss() elif args.loss == "softdice": criterion = SoftDiceLoss() elif args.loss == 'bcedice': criterion = BCEDiceLoss() else: criterion = nn.CrossEntropyLoss() if args.use_cuda: logger.info("load model and criterion to gpu !") model = model.to(args.device) criterion = criterion.to(args.device) infer(args, model, criterion, val_loader, logger, args.save_images)
def main(train_args): backbone = ResNet() backbone.load_state_dict(torch.load( './weight/resnet34-333f7ec4.pth'), strict=False) net = Decoder34(num_classes=13, backbone=backbone).cuda() D = discriminator(input_channels=16).cuda() if len(train_args['snapshot']) == 0: curr_epoch = 1 train_args['best_record'] = { 'epoch': 0, 'val_loss': 1e10, 'acc': 0, 'acc_cls': 0, 'mean_iu': 0, 'fwavacc': 0} else: print('training resumes from ' + train_args['snapshot']) net.load_state_dict(torch.load(os.path.join( ckpt_path, exp_name, train_args['snapshot']))) split_snapshot = train_args['snapshot'].split('_') curr_epoch = int(split_snapshot[1]) + 1 train_args['best_record'] = {'epoch': int(split_snapshot[1]), 'val_loss': float(split_snapshot[3]), 'acc': float(split_snapshot[5]), 'acc_cls': float(split_snapshot[7]), 'mean_iu': float(split_snapshot[9]), 'fwavacc': float(split_snapshot[11])} net.train() D.train() mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) input_transform = standard_transforms.Compose([ standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std) ]) target_transform = extended_transforms.MaskToTensor() restore_transform = standard_transforms.Compose([ extended_transforms.DeNormalize(*mean_std), standard_transforms.ToPILImage(), ]) visualize = standard_transforms.Compose([ standard_transforms.Scale(400), standard_transforms.CenterCrop(400), standard_transforms.ToTensor() ]) train_set = wp.Wp('train', transform=input_transform, target_transform=target_transform) train_loader = DataLoader(train_set, batch_size=4, num_workers=4, shuffle=True) # val_set = wp.Wp('val', transform=input_transform, # target_transform=target_transform) # XR:所以这里本来就不能用到val?这里为什么不用一个val的数据集呢? val_loader = DataLoader(train_set, batch_size=1, num_workers=4, shuffle=False) criterion = DiceLoss().cuda() criterion_D = nn.BCELoss().cuda() optimizer_AE = optim.Adam([ {'params': [param for name, param in net.named_parameters() if name[-4:] == 'bias'], 'lr': 2 * train_args['lr']}, {'params': [param for name, param in net.named_parameters() if name[-4:] != 'bias'], 'lr': train_args['lr'], 'weight_decay': train_args['weight_decay']} ], betas=(train_args['momentum'], 0.999)) optimizer_D = optim.Adam([ {'params': [param for name, param in D.named_parameters() if name[-4:] == 'bias'], 'lr': 2 * train_args['lr']}, {'params': [param for name, param in D.named_parameters() if name[-4:] != 'bias'], 'lr': train_args['lr'], 'weight_decay': train_args['weight_decay']} ], betas=(train_args['momentum'], 0.999)) if len(train_args['snapshot']) > 0: optimizer_AE.load_state_dict(torch.load(os.path.join( ckpt_path, exp_name, 'opt_' + train_args['snapshot']))) optimizer_AE.param_groups[0]['lr'] = 2 * train_args['lr'] optimizer_AE.param_groups[1]['lr'] = train_args['lr'] check_mkdir(ckpt_path) check_mkdir(os.path.join(ckpt_path, exp_name)) open(os.path.join(ckpt_path, exp_name, str(datetime.datetime.now()) + '.txt'), 'w').write(str(train_args) + '\n\n') scheduler = ReduceLROnPlateau( optimizer_AE, 'min', patience=train_args['lr_patience'], min_lr=1e-10, verbose=True) for epoch in range(curr_epoch, train_args['epoch_num'] + 1): train(train_loader, net, D, criterion, criterion_D, optimizer_AE, optimizer_D, epoch, train_args) val_loss = validate(val_loader, net, criterion, optimizer_AE, epoch, train_args, restore_transform, visualize) scheduler.step(val_loss)
def main(args): #################### init logger ################################### args.model='unet' model_weight_path='../logs/isic2018/unet_ep300/20200402-135108/model_best.pth.tar' model=get_models(args) model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) log_dir = './models/' + args.model+'_prune_'+args.note logger = get_logger(log_dir) print('RUNDIR: {}'.format(log_dir)) logger.info('{}-L1Prune'.format(args.model)) # setting args.save_path = log_dir args.save_tbx_log = args.save_path + '/tbx_log' writer = SummaryWriter(args.save_tbx_log) if args.manualSeed is None: args.manualSeed = random.randint(1, 10000) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) args.use_cuda = args.gpus > 0 and torch.cuda.is_available() args.device = torch.device('cuda' if args.use_cuda else 'cpu') if args.use_cuda: torch.cuda.manual_seed(args.manualSeed) cudnn.benchmark = True setting = {k: v for k, v in args._get_kwargs()} logger.info(setting) train_loader=get_dataloder(args,split_flag="train") val_loader=get_dataloder(args,split_flag="valid") # init loss if args.loss == 'bce': criterion = nn.BCELoss() elif args.loss == 'bcelog': criterion = nn.BCEWithLogitsLoss() elif args.loss == "dice": criterion = DiceLoss() elif args.loss == "softdice": criterion = SoftDiceLoss() elif args.loss == 'bcedice': criterion = BCEDiceLoss() else: criterion = nn.CrossEntropyLoss() if args.use_cuda: logger.info("load model and criterion to gpu !") model = model.to(args.device) criterion = criterion.to(args.device) logger.info("Original trained model performance test: ") infer(args, model, criterion, val_loader,logger) # Pruning # Pruning Configuration, in paper 'PRUNING FILTERS FOR EFFICIENT CONVNETS', configure_list = [{ 'sparsity': 0.5, 'op_types': ['Conv2d'], 'op_names': ['Conv1.conv.0','Conv1.conv.3','Conv2.conv.0','Conv2.conv.3','Conv3.conv.0','Conv3.conv.3', 'Conv4.conv.0','Conv4.conv.3','Conv5.conv.0','Conv5.conv.3', 'Up5.up.1','Up_conv5.conv.0','Up_conv5.conv.3', 'Up4.up.1','Up_conv4.conv.0','Up_conv4.conv.3', 'Up3.up.1','Up_conv3.conv.0','Up_conv3.conv.3', 'Up2.up.1','Up_conv2.conv.0','Up_conv2.conv.3', ]} ] # Prune model and test accuracy without fine tuning. logger.info('=' * 10 + 'Test on the pruned model before fine tune' + '=' * 10) pruner = L1FilterPruner(model, configure_list) # change the forward func (mul pruning mask ) model = pruner.compress() # test performance without finetuning logger.info("Pruning trained model performance test: ") infer(args, model, criterion, val_loader,logger) # Fine tune the pruned model for 40 epochs and test accuracy logger.info('=' * 10 + 'Fine tuning' + '=' * 10) #torch.optim.SGD(parametetrs,lr=args.lr,weight_decay=args.weight_decay,momentum=args.momentum) optimizer=torch.optim.SGD(model.parameters(),lr=args.lr,weight_decay=args.weight_decay,momentum=args.momentum) # init schedulers Steplr scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,args.epoch) max_value = 0 for epoch in range(0, args.epoch): # lr=adjust_learning_rate(args,optimizer,epoch) scheduler.step() logger.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0]) # update mask pruner.update_epoch(epoch) # train train(args, model, criterion, train_loader,optimizer, epoch, logger) # val vmr, vms, vmp, vmf, vmjc, vmd, vmacc,vloss = infer(args, model, criterion, val_loader,logger) writer.add_scalar('Val/Loss', vloss, epoch) writer.add_scalar('Val/mAcc', vmacc, epoch) writer.add_scalar('Val/Recall', vmr, epoch) writer.add_scalar('Val/Specifi', vms, epoch) writer.add_scalar('Val/Precision', vmp, epoch) writer.add_scalar('Val/F1', vmf, epoch) writer.add_scalar('Val/Jc', vmjc, epoch) writer.add_scalar('Val/Dice', vmd, epoch) is_best = True if (vmjc >= max_value) else False max_value = max(max_value, vmjc) if is_best: pruner.export_model(model_path=os.path.join(args.save_path,"best_prune_unet.pth"), mask_path=os.path.join(args.save_path,'mask_prune_indexs.pth')) state = { 'epoch': epoch, 'optimizer': optimizer.state_dict(), 'state_dict': model.state_dict(), 'scheduler': model.state_dict(), } logger.info("epoch:{} best:{} max_value:{}".format(epoch, is_best, max_value)) torch.save(state, os.path.join(args.save_path, "checkpoint.pth.tar")) writer.close() # test the best_prune_unet.pth args.model='unet' model_weight_path=os.path.join(args.save_path,"best_prune_unet.pth") model=get_models(args) model.load_state_dict(torch.load(model_weight_path, map_location='cpu')) model = model.to(args.device) logger.info("Final saved pruned model performance test: ") infer(args, model, criterion, val_loader,logger)
def main(args): args.model_list = [ 'double_deep', 'double', 'nodouble', 'nodouble_deep', 'slim_dd', 'slim_double', 'slim_nodouble', 'slim_nodouble_deep' ] #args.model_list=["slim_nodouble_deep_init32"] for model_name in args.model_list: print(model_name) if model_name == "double_deep": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'stage1_layer9_110epoch_double_deep_final' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) args.model_path = './logs/isic2018/prune_20200313-063406_32_32_ep300_double_deep/model_best.pth.tar' model.load_state_dict( torch.load(args.model_path, map_location='cpu')['state_dict']) elif model_name == 'double': args.deepsupervision = False args.double_down_channel = True args.genotype_name = 'stage1_layer9_110epoch_double_final' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) args.model_path = './logs/isic2018/prune_20200313-063428_32_32_ep300_double/model_best.pth.tar' model.load_state_dict( torch.load(args.model_path, map_location='cpu')['state_dict']) elif model_name == 'nodouble': args.deepsupervision = False args.double_down_channel = False args.genotype_name = 'stage1_layer9_110epoch_final' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) args.model_path = './logs/isic2018/prune_20200316-141125_nodouble_32_ep300/model_best.pth.tar' model.load_state_dict( torch.load(args.model_path, map_location='cpu')['state_dict']) elif model_name == 'nodouble_deep': args.deepsupervision = True args.double_down_channel = False args.genotype_name = 'stage1_layer9_110epoch_deep_final' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) args.model_path = './logs/isic2018/prune_20200316-141242_nodouble_32_ep300_deep/model_best.pth.tar' model.load_state_dict( torch.load(args.model_path, map_location='cpu')['state_dict']) if model_name == "slim_dd": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'stage1_layer9_110epoch_double_deep_final' genotype = eval('genotypes.%s' % args.genotype_name) model = net_dd(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) args.model_path = './logs/isic2018/dd_20200319-170442_ep300/model_best.pth.tar' model.load_state_dict( torch.load(args.model_path, map_location='cpu')['state_dict']) elif model_name == 'slim_double': args.deepsupervision = False args.double_down_channel = True args.genotype_name = 'stage1_layer9_110epoch_double_final' genotype = eval('genotypes.%s' % args.genotype_name) model = net_double(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) args.model_path = './logs/isic2018/double_20200319-170621_ep300/model_best.pth.tar' model.load_state_dict( torch.load(args.model_path, map_location='cpu')['state_dict']) elif model_name == 'slim_nodouble': args.deepsupervision = False args.double_down_channel = False args.genotype_name = 'stage1_layer9_110epoch_final' genotype = eval('genotypes.%s' % args.genotype_name) model = net_nodouble(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) args.model_path = './logs/isic2018/nodouble_20200319-210910_ep300/model_best.pth.tar' model.load_state_dict( torch.load(args.model_path, map_location='cpu')['state_dict']) elif model_name == 'slim_nodouble_deep': args.deepsupervision = True args.double_down_channel = False args.genotype_name = 'stage1_layer9_110epoch_deep_final' genotype = eval('genotypes.%s' % args.genotype_name) model = net_nodouble_deep( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) args.model_path = './logs/isic2018/nodouble_deep_20200319-210600_ep300/model_best.pth.tar' model.load_state_dict( torch.load(args.model_path, map_location='cpu')['state_dict']) elif model_name == 'slim_nodouble_deep_init32': args.deepsupervision = True args.double_down_channel = False args.genotype_name = 'stage1_layer9_110epoch_deep_final' genotype = eval('genotypes.%s' % args.genotype_name) model = net_nodouble_deep( genotype=genotype, input_c=args.in_channels, c=32, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) args.model_path = './logs/isic2018/nodouble_deep_ep300_init32/model_best.pth.tar' model.load_state_dict( torch.load(args.model_path, map_location='cpu')['state_dict']) #################### init logger ################################### log_dir = './eval' + '/{}'.format( args.dataset) + '/{}'.format(model_name) ##################### init model ######################################## logger = get_logger(log_dir) print('RUNDIR: {}'.format(log_dir)) logger.info('{}-Eval'.format(model_name)) # setting args.save_path = log_dir args.save_images = os.path.join(args.save_path, "images") if not os.path.exists(args.save_images): os.mkdir(args.save_images) ##################### init device ################################# if args.manualSeed is None: args.manualSeed = random.randint(1, 10000) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) args.use_cuda = args.gpus > 0 and torch.cuda.is_available() args.device = torch.device('cuda' if args.use_cuda else 'cpu') if args.use_cuda: torch.cuda.manual_seed(args.manualSeed) cudnn.benchmark = True ####################### init dataset ########################################### # sorted vaild datasets val_loader = get_dataloder(args, split_flag="valid") setting = {k: v for k, v in args._get_kwargs()} logger.info(setting) logger.info(genotype) logger.info('param size = %fMB', calc_parameters_count(model)) # init loss if args.loss == 'bce': criterion = nn.BCELoss() elif args.loss == 'bcelog': criterion = nn.BCEWithLogitsLoss() elif args.loss == "dice": criterion = DiceLoss() elif args.loss == "softdice": criterion = SoftDiceLoss() elif args.loss == 'bcedice': criterion = BCEDiceLoss() else: criterion = nn.CrossEntropyLoss() if args.use_cuda: logger.info("load model and criterion to gpu !") model = model.to(args.device) criterion = criterion.to(args.device)
def main(args): #################### init logger ################################### args.model_list=["unet","unet++",'attention_unet_v1','multires_unet','r2unet_t3'] for model_name in args.model_list: if model_name=='unet': args.model='unet' model_weight_path='./logs/unet_ep1600/cvc/20200312-143050/model_best.pth.tar' model=get_models(args) model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) elif model_name=='unet++': args.model='unet++' args.deepsupervision=False model_weight_path='./logs/unet++_ep1600/cvc/20200312-143358/model_best.pth.tar' model=get_models(args) model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) elif model_name == 'attention_unet_v1': args.model = 'attention_unet_v1' model_weight_path = './logs/attention_unet_v1_ep1600/cvc/20200312-143413/model_best.pth.tar' model = get_models(args) model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) elif model_name == 'multires_unet': args.model = 'multires_unet' model_weight_path = './logs/multires_unet_ep1600_t2/20200322-194117/model_best.pth.tar' model = get_models(args) model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) # change bn relu order elif model_name == 'multires_unet_align': args.model = 'multires_unet' model_weight_path = './logs/multires_unet_ep1600_chbnrelu/20200327-184457/model_best.pth.tar' model = get_models(args) model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) elif model_name == 'r2unet_t3': args.model = 'r2unet' args.time_step=3 model_weight_path = './logs/r2unet_ep1600_t2/20200324-032815/model_best.pth.tar' model = get_models(args) model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) elif model_name == 'unet_ep800dice': args.model = 'unet' model_weight_path = './logs/unet_ep800_bcedice/cvc/20200315-043021/model_best.pth.tar' model = get_models(args) model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) elif model_name=='unet++_nodeep_ep800dice': args.model='unet++' args.deepsupervision=False model_weight_path='./logs/unet++_ep800_bcedice/cvc/20200315-043214/model_best.pth.tar' model=get_models(args) model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) elif model_name == 'unet++_deep_ep800dice': args.model = 'unet++' args.deepsupervision = True model_weight_path = './logs/unet++_deep_ep800_bcedice/cvc/20200315-043134/model_best.pth.tar' model = get_models(args) model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) elif model_name == 'attention_unet_v1_ep800dice': args.model = 'attention_unet_v1' args.deepsupervision=False model_weight_path = './logs/attention_unet_v1_ep800_bcedice/cvc/20200315-043300/model_best.pth.tar' model = get_models(args) model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) elif model_name == 'multires_unet_ep800dice': args.model = 'multires_unet' args.deepsupervision=False model_weight_path = './logs/multires_unet_ep800_bcedice/cvc/20200312-173031/model_best.pth.tar' model = get_models(args) model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) else: raise NotImplementedError() assert os.path.exists(args.save) args.model_save_path=os.path.join(args.save,model_name) logger = get_logger(args.model_save_path) args.save_images= os.path.join(args.model_save_path,"images") if not os.path.exists(args.save_images): os.mkdir(args.save_images) if args.manualSeed is None: args.manualSeed = random.randint(1, 10000) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) args.use_cuda = args.gpus > 0 and torch.cuda.is_available() args.device = torch.device('cuda' if args.use_cuda else 'cpu') if args.use_cuda: torch.cuda.manual_seed(args.manualSeed) cudnn.benchmark = True val_loader = get_dataloder(args, split_flag="valid") setting = {k: v for k, v in args._get_kwargs()} logger.info(setting) logger.info('param size = %fMB', calc_parameters_count(model)) # init loss if args.loss == 'bce': criterion = nn.BCELoss() elif args.loss == 'bcelog': criterion = nn.BCEWithLogitsLoss() elif args.loss == "dice": criterion = DiceLoss() elif args.loss == "softdice": criterion = SoftDiceLoss() elif args.loss == 'bcedice': criterion = BCEDiceLoss() else: criterion = nn.CrossEntropyLoss() if args.use_cuda: logger.info("load model and criterion to gpu !") model = model.to(args.device) criterion = criterion.to(args.device) infer(args, model, criterion, val_loader,logger,args.save_images)
def main(args): #################### init logger ################################### log_dir = './logs/' + '{}'.format(args.dataset) + '/{}_{}_{}'.format( args.model, time.strftime('%Y%m%d-%H%M%S'), args.note) logger = get_logger(log_dir) print('RUNDIR: {}'.format(log_dir)) logger.info('{}-Train'.format(args.model)) # setting args.save_path = log_dir args.save_tbx_log = args.save_path + '/tbx_log' writer = SummaryWriter(args.save_tbx_log) ##################### init device ################################# if args.manualSeed is None: args.manualSeed = random.randint(1, 10000) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) args.use_cuda = args.gpus > 0 and torch.cuda.is_available() args.device = torch.device('cuda' if args.use_cuda else 'cpu') if args.use_cuda: torch.cuda.manual_seed(args.manualSeed) cudnn.benchmark = True ####################### init dataset ########################################### train_loader = get_dataloder(args, split_flag="train") val_loader = get_dataloder(args, split_flag="valid") ######################## init model ############################################ # model # get the network parameters if args.model == "alpha_double_deep": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'stage1_layer9_110epoch_double_deep_final' args.alphas_model = './search_exp/Nas_Search_Unet/isic2018/deepsupervision/stage_1_model/checkpoint.pth.tar' model_alphas = torch.load( args.alphas_model, map_location=args.device)['alphas_dict']['alphas_network'] model_alphas.requires_grad = False model_alphas = F.softmax(model_alphas, dim=-1) genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnet(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "alpha_double": args.deepsupervision = False args.double_down_channel = True args.genotype_name = 'stage1_layer9_110epoch_double_final' args.alphas_model = './search_exp/Nas_Search_Unet/isic2018/nodeepsupervision/stage_1_model/checkpoint.pth.tar' model_alphas = torch.load( args.alphas_model, map_location=args.device)['alphas_dict']['alphas_network'] model_alphas.requires_grad = False model_alphas = F.softmax(model_alphas, dim=-1) genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnet(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "alpha_nodouble": args.deepsupervision = False args.double_down_channel = False args.genotype_name = 'stage1_layer9_110epoch_final' args.alphas_model = './search_exp/Nas_Search_Unet/isic2018/nodouble/stage_1_model/checkpoint.pth.tar' model_alphas = torch.load( args.alphas_model, map_location=args.device)['alphas_dict']['alphas_network'] model_alphas.requires_grad = False model_alphas = F.softmax(model_alphas, dim=-1) genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnet(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "alpha_nodouble_deep": args.deepsupervision = True args.double_down_channel = False args.genotype_name = 'stage1_layer9_110epoch_deep_final' args.alphas_model = './search_exp/Nas_Search_Unet/isic2018/nodouble_deep/stage_1_model/checkpoint.pth.tar' model_alphas = torch.load( args.alphas_model, map_location=args.device)['alphas_dict']['alphas_network'] model_alphas.requires_grad = False model_alphas = F.softmax(model_alphas, dim=-1) genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnet(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "double_deep": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'stage1_layer9_110epoch_double_deep_final' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "double": args.deepsupervision = False args.double_down_channel = True args.genotype_name = 'stage1_layer9_110epoch_double_final' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "nodouble": args.deepsupervision = False args.double_down_channel = False args.genotype_name = 'stage1_layer9_110epoch_final' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "nodouble_deep": args.deepsupervision = True args.double_down_channel = False args.genotype_name = 'stage1_layer9_110epoch_deep_final' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "alpha1_stage1_double_deep_ep80": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'alpha1_stage1_double_deep_ep80' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "alpha0_stage1_double_deep_ep80": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'alpha0_stage1_double_deep_ep80' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "alpha0_5_stage1_double_deep_ep80": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'alpha0_5_stage1_double_deep_ep80' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "alpha0_5_stage1_double_nodeep_ep80": args.deepsupervision = False args.double_down_channel = True args.genotype_name = 'alpha0_5_stage1_double_nodeep_ep80' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "alpha0_5_stage1_nodouble_deep_ep80": args.deepsupervision = True args.double_down_channel = False args.genotype_name = 'alpha0_5_stage1_nodouble_deep_ep80' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "alpha0_5_stage1_nodouble_nodeep_ep80": args.deepsupervision = False args.double_down_channel = False args.genotype_name = 'alpha0_5_stage1_nodouble_nodeep_ep80' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) # cvc trans elif args.model == "layer7_double_deep": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'layer7_double_deep' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) # chaos trans elif args.model == "stage0_double_deep_ep80_newim": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'stage0_double_deep_ep80_newim' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) if torch.cuda.device_count() > 1 and args.use_cuda: logger.info('use: %d gpus', torch.cuda.device_count()) model = nn.DataParallel(model) setting = {k: v for k, v in args._get_kwargs()} logger.info(setting) logger.info(genotype) logger.info(model_alphas) flop, param = get_model_complexity_info(model, (3, 256, 256), as_strings=True, print_per_layer_stat=False) print("GFLOPs: {}".format(flop)) print("Params: {}".format(param)) # init loss if args.loss == 'bce': criterion = nn.BCELoss() elif args.loss == 'bcelog': criterion = nn.BCEWithLogitsLoss() elif args.loss == "dice": criterion = DiceLoss() elif args.loss == "softdice": criterion = SoftDiceLoss() elif args.loss == 'bcedice': criterion = BCEDiceLoss() else: criterion = nn.CrossEntropyLoss() if args.use_cuda: logger.info("load model and criterion to gpu !") model = model.to(args.device) criterion = criterion.to(args.device) # init optimizer optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum) # init schedulers Steplr scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.epoch) #scheduler=torch.optim.lr_scheduler.StepLR(optimizer=optimizer,step_size=30,gamma=0.1,last_epoch=-1) ############################### check resume ######################### start_epoch = 0 if args.resume is not None: if os.path.isfile(args.resume): logger.info( "Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume, map_location=args.device) start_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) model.load_state_dict(checkpoint['state_dict']) scheduler.load_state_dict(checkpoint['scheduler']) else: raise FileNotFoundError("No checkpoint found at '{}'".format( args.resume)) #################################### train and val ######################## max_value = 0 for epoch in range(start_epoch, args.epoch): # lr=adjust_learning_rate(args,optimizer,epoch) scheduler.step() # train if args.deepsupervision: mean_loss, value1, value2 = train(args, model_alphas, model, criterion, train_loader, optimizer) mr, ms, mp, mf, mjc, md, macc = value1 logger.info( "Epoch:{} Train_Loss:{:.3f} Acc:{:.3f} Dice:{:.3f} Jc:{:.3f}". format(epoch, mean_loss, macc, md, mjc)) writer.add_scalar('Train/dDice', mmd, epoch) else: mean_loss, value1 = train(args, model_alphas, model, criterion, train_loader, optimizer) mr, ms, mp, mf, mjc, md, macc = value1 logger.info( "Epoch:{} Train_Loss:{:.3f} Acc:{:.3f} Dice:{:.3f} Jc:{:.3f}". format(epoch, mean_loss, macc, md, mjc)) # write writer.add_scalar('Train/Loss', mean_loss, epoch) # val if args.deepsupervision: vmean_loss, valuev1, valuev2 = infer(args, model_alphas, model, criterion, val_loader) vmr, vms, vmp, vmf, vmjc, vmd, vmacc = valuev1 logger.info( "Epoch:{} Val_Loss:{:.3f} Acc:{:.3f} Dice:{:.3f} Jc:{:.3f}". format(epoch, vmean_loss, vmacc, vmd, vmjc)) else: vmean_loss, valuev1 = infer(args, model_alphas, model, criterion, val_loader) vmr, vms, vmp, vmf, vmjc, vmd, vmacc = valuev1 logger.info( "Epoch:{} Val_Loss:{:.3f} Acc:{:.3f} Dice:{:.3f} Jc:{:.3f}". format(epoch, vmean_loss, vmacc, vmd, vmjc)) is_best = True if vmjc >= max_value else False max_value = max(max_value, vmjc) writer.add_scalar('Val/Loss', vmean_loss, epoch) state = { 'epoch': epoch, 'optimizer': optimizer.state_dict(), 'state_dict': model.state_dict(), 'scheduler': model.state_dict(), } logger.info("epoch:{} best:{} max_value:{}".format( epoch, is_best, max_value)) if not is_best: torch.save(state, os.path.join(args.save_path, "checkpoint.pth.tar")) else: torch.save(state, os.path.join(args.save_path, "checkpoint.pth.tar")) torch.save(state, os.path.join(args.save_path, "model_best.pth.tar")) writer.close()
def main(args): data_path = '/home/birgit/MA/Code/torchmeta/gitlab/data' with open(args.config, 'r') as f: config = json.load(f) if args.folder is not None: config['folder'] = args.folder if args.num_steps > 0: config['num_steps'] = args.num_steps if args.num_batches > 0: config['num_batches'] = args.num_batches device = torch.device( 'cuda' if args.use_cuda and torch.cuda.is_available() else 'cpu') loss_function = DiceLoss() dataset = 'pascal5i' fold = config['fold'] steps = config['num_adaption_steps'] padding = 1 if 'feature_scale' in config.keys(): model = Unet(feature_scale=config['feature_scale'], padding=padding) else: model = Unet(feature_scale=4, padding=padding) # get datasets and load into meta learning format meta_train_dataset, meta_val_dataset, meta_test_dataset = get_datasets( dataset, data_path, config['num_ways'], config['num_shots'], config['num_shots_test'], fold=fold, download=False, augment=False) meta_val_dataloader = BatchMetaDataLoader(meta_val_dataset, batch_size=config['batch_size'], shuffle=True, num_workers=args.num_workers, pin_memory=True) print('num shots = ', config['num_shots']) print(f'Using device: {device}') with open(config['model_path'], 'rb') as f: model.load_state_dict(torch.load(f, map_location=device)) metalearner = ModelAgnosticMetaLearning(model, first_order=config['first_order'], num_adaptation_steps=steps, step_size=config['step_size'], loss_function=loss_function, device=device) results = metalearner.evaluate(meta_val_dataloader, max_batches=config['num_batches'], verbose=args.verbose, desc='Test', is_test=True) if dataset == 'pascal5i': labels = [ 'aeroplane', 'bike', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'dining table', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] accuracies = [ value for _, value in results['mean_acc_per_label'].items() ] ious = [value for _, value in results['mean_iou_per_label'].items()] val_ious = [x for x in ious if x > 0.0] val_accs = [x for x in accuracies if x > 0.0] y_pos = np.arange(len(labels)) fig, (ax1, ax2) = plt.subplots(1, 2) ax1.barh(y_pos, accuracies, align='center', alpha=0.5) ax1.set_yticks(y_pos) ax1.set_yticklabels(labels) ax1.set_xlabel('acc') ax1.set_xlim(0, 1) ax1.set_title('Accuracies per label') ax2.barh(y_pos, ious, align='center', alpha=0.5) ax2.set_yticks(y_pos) ax2.set_yticklabels(labels) ax2.set_xlabel('iou') ax2.set_xlim(0, 1) ax2.set_title('IoU scores per label') plt.grid(True) plt.show() # Save results dirname = os.path.dirname(config['model_path']) with open(os.path.join(dirname, 'test_results.json'), 'w') as f: json.dump(results, f)
def main(args): #################### init logger ################################### # args.model_list=["unet","unet++_deep","unet++_nodeep",'attention_unet_v1','multires_unet','r2unet_t3', # 'unet_ep800dice','unet++_deep_ep800dice','unet++_nodeep_ep800dice','attention_unet_v1_ep800dice','multires_unet_ep800dice' # ] args.model_list = ['unet', 'unet++', "attention_unet", "multires_unet"] for model_name in args.model_list: if model_name == 'unet': args.model = 'unet' model_weight_path = './logs/chaos/unet_ep150_v2/20200403-134703/checkpoint.pth.tar' model = get_models(args) model.load_state_dict( torch.load(model_weight_path, map_location='cpu')['state_dict']) elif model_name == 'unet++': args.model = 'unet++' args.deepsupervision = False model_weight_path = './logs/chaos/unet++_ep150_v2/20200403-135401/checkpoint.pth.tar' model = get_models(args) model.load_state_dict( torch.load(model_weight_path, map_location='cpu')['state_dict']) # elif model_name == 'unet++_deep': # args.model = 'unet++' # args.deepsupervision = True # model_weight_path = './logs/unet++_deep_ep1600/cvc/20200312-143345/model_best.pth.tar' # model = get_models(args) # model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) elif model_name == 'attention_unet': args.model = 'attention_unet_v1' args.deepsupervision = False model_weight_path = './logs/chaos/attention_unet_v1_ep150_v2/20200403-135445/checkpoint.pth.tar' model = get_models(args) model.load_state_dict( torch.load(model_weight_path, map_location='cpu')['state_dict']) elif model_name == 'multires_unet': args.model = 'multires_unet' args.deepsupervision = False model_weight_path = './logs/chaos/multires_unet_ep150_v2/20200403-135549/checkpoint.pth.tar' model = get_models(args) model.load_state_dict( torch.load(model_weight_path, map_location='cpu')['state_dict']) else: raise NotImplementedError() assert os.path.exists(args.save) args.model_save_path = os.path.join(args.save, model_name) logger = get_logger(args.model_save_path) args.save_images = os.path.join(args.model_save_path, "images") if not os.path.exists(args.save_images): os.mkdir(args.save_images) if args.manualSeed is None: args.manualSeed = random.randint(1, 10000) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) args.use_cuda = args.gpus > 0 and torch.cuda.is_available() args.device = torch.device('cuda' if args.use_cuda else 'cpu') if args.use_cuda: torch.cuda.manual_seed(args.manualSeed) cudnn.benchmark = True val_loader = get_dataloder(args, split_flag="valid") setting = {k: v for k, v in args._get_kwargs()} logger.info(setting) logger.info('param size = %fMB', calc_parameters_count(model)) # init loss if args.loss == 'bce': criterion = nn.BCELoss() elif args.loss == 'bcelog': criterion = nn.BCEWithLogitsLoss() elif args.loss == "dice": criterion = DiceLoss() elif args.loss == "softdice": criterion = SoftDiceLoss() elif args.loss == 'bcedice': criterion = BCEDiceLoss() else: criterion = nn.CrossEntropyLoss() if args.use_cuda: logger.info("load model and criterion to gpu !") model = model.to(args.device) criterion = criterion.to(args.device) infer(args, model, criterion, val_loader, logger, args.save_images)
def main(args): #################### init logger ################################### #args.model_list=["unet","unet++_deep",'attention_unet_v1','multires_unet', 'r2unet_t3'] args.model_list = [ "unet", "unet++_deep", 'unet++_nodeep', "attention_unet_v1", "multires_unet", "r2unet" ] for model_name in args.model_list: # if model_name=='unet': # args.model='unet' # model_weight_path='./logs/isic/logs_coslr/unet/isic2018/20200229-035150/checkpoint.pth.tar' # model=get_models(args) # model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) # elif model_name=='unet++_deep': # args.model='unet++' # args.deepsupervision=True # model_weight_path='./logs/isic/logs_coslr/unet++/isic2018/20200229-035514/checkpoint.pth.tar' # model=get_models(args) # model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) # elif model_name == 'unet++_nodeep': # args.model = 'unet++' # args.deepsupervision = False # model_weight_path = '/checkpoint.pth.tar' # model = get_models(args) # model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) # elif model_name == 'attention_unet_v1': # args.model = 'attention_unet_v1' # model_weight_path = './logs/isic/logs_coslr/attention_unet_v1/isic2018/20200302-190718/checkpoint.pth.tar' # args.deepsupervision=False # model = get_models(args) # model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) # # elif model_name == 'multires_unet': # args.model = 'multires_unet' # model_weight_path = './logs/isic/logs_coslr/multires_unet/isic2018/20200229-035734/checkpoint.pth.tar' # model = get_models(args) # model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) # # elif model_name == 'r2unet_t3': # args.model = 'r2unet' # args.time_step=3 # model_weight_path = './logs/isic/logs_coslr/r2unet/isic2018/20200302-190808/checkpoint.pth.tar' # model = get_models(args) # model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) # ep300 baseline if model_name == 'unet': args.model = 'unet' model_weight_path = './logs/isic2018/unet_ep300/20200402-135108/model_best.pth.tar' model = get_models(args) model.load_state_dict( torch.load(model_weight_path, map_location='cpu')['state_dict']) elif model_name == 'unet++_deep': args.model = 'unet++' args.deepsupervision = True model_weight_path = './logs/isic2018/unet++_ep300_deep/20200402-135243/model_best.pth.tar' model = get_models(args) model.load_state_dict( torch.load(model_weight_path, map_location='cpu')['state_dict']) elif model_name == 'unet++_nodeep': args.model = 'unet++' args.deepsupervision = False model_weight_path = './logs/isic2018/unet++_ep300/20200402-135317/model_best.pth.tar' model = get_models(args) model.load_state_dict( torch.load(model_weight_path, map_location='cpu')['state_dict']) elif model_name == 'attention_unet_v1': args.model = 'attention_unet_v1' args.deepsupervision = False model_weight_path = './logs/isic2018/attention_unet_v1_ep300/20200413-160808//model_best.pth.tar' model = get_models(args) model.load_state_dict( torch.load(model_weight_path, map_location='cpu')['state_dict']) elif model_name == 'multires_unet': args.model = 'multires_unet' args.deepsupervision = False model_weight_path = './logs/isic2018/attention_unet_v1_ep300/20200413-160808//model_best.pth.tar' model = get_models(args) model.load_state_dict( torch.load(model_weight_path, map_location='cpu')['state_dict']) elif model_name == 'r2unet': args.model = 'r2unet' args.deepsupervision = False model_weight_path = './logs/isic2018/attention_unet_v1_ep300/20200413-160808//model_best.pth.tar' model = get_models(args) model.load_state_dict( torch.load(model_weight_path, map_location='cpu')['state_dict']) # elif model_name == 'attention_unet_v1': # args.model = 'attention_unet_v1' # model_weight_path = './logs/isic/logs_coslr/attention_unet_v1/isic2018/20200302-190718/checkpoint.pth.tar' # args.deepsupervision=False # model = get_models(args) # model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) # # elif model_name == 'multires_unet': # args.model = 'multires_unet' # model_weight_path = './logs/isic/logs_coslr/multires_unet/isic2018/20200229-035734/checkpoint.pth.tar' # model = get_models(args) # model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) # # elif model_name == 'r2unet_t3': # args.model = 'r2unet' # args.time_step=3 # model_weight_path = './logs/isic/logs_coslr/r2unet/isic2018/20200302-190808/checkpoint.pth.tar' # model = get_models(args) # model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict']) else: raise NotImplementedError() assert os.path.exists(args.save) args.model_save_path = os.path.join(args.save, model_name) logger = get_logger(args.model_save_path) args.save_images = os.path.join(args.model_save_path, "images") if not os.path.exists(args.save_images): os.mkdir(args.save_images) if args.manualSeed is None: args.manualSeed = random.randint(1, 10000) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) args.use_cuda = args.gpus > 0 and torch.cuda.is_available() args.device = torch.device('cuda' if args.use_cuda else 'cpu') if args.use_cuda: torch.cuda.manual_seed(args.manualSeed) cudnn.benchmark = True val_loader = get_dataloder(args, split_flag="valid") setting = {k: v for k, v in args._get_kwargs()} logger.info(setting) logger.info('param size = %fMB', calc_parameters_count(model)) # init loss if args.loss == 'bce': criterion = nn.BCELoss() elif args.loss == 'bcelog': criterion = nn.BCEWithLogitsLoss() elif args.loss == "dice": criterion = DiceLoss() elif args.loss == "softdice": criterion = SoftDiceLoss() elif args.loss == 'bcedice': criterion = BCEDiceLoss() else: criterion = nn.CrossEntropyLoss() if args.use_cuda: logger.info("load model and criterion to gpu !") model = model.to(args.device) criterion = criterion.to(args.device) infer(args, model, criterion, val_loader, logger, args.save_images)
def main(args): #################### init logger ################################### log_dir = './logs/' + '{}'.format(args.dataset) + '/{}_{}_{}'.format(args.model,args.note,time.strftime('%Y%m%d-%H%M%S')) logger = get_logger(log_dir) print('RUNDIR: {}'.format(log_dir)) logger.info('{}-Train'.format(args.model)) # setting args.save_path = log_dir args.save_tbx_log = args.save_path + '/tbx_log' writer = SummaryWriter(args.save_tbx_log) ##################### init device ################################# if args.manualSeed is None: args.manualSeed = random.randint(1, 10000) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) args.use_cuda = args.gpus > 0 and torch.cuda.is_available() args.device = torch.device('cuda' if args.use_cuda else 'cpu') if args.use_cuda: torch.cuda.manual_seed(args.manualSeed) cudnn.benchmark = True ####################### init dataset ########################################### train_loader = get_dataloder(args, split_flag="train") val_loader = get_dataloder(args, split_flag="valid") ############init model ########################### if args.model == "layer7_double_deep": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'layer7_double_deep' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=7, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux ) elif args.model == "stage1_double_deep": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'stage1_double_deep' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux ) elif args.model == "stage1_nodouble_deep": args.deepsupervision = True args.double_down_channel = False args.genotype_name = 'stage1_deep' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux ) elif args.model == "stage1_nodouble_deep_slim": args.deepsupervision = True args.double_down_channel = False args.genotype_name = 'stage1_deep' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPruneSlim( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux ) elif args.model == "alpha1_stage1_double_deep_ep80": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'alpha1_stage1_double_deep_ep80' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux ) elif args.model == "alpha0_stage1_double_deep_ep80": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'alpha0_stage1_double_deep_ep80' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux ) #isic trans elif args.model == "stage1_layer9_110epoch_double_deep_final": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'stage1_layer9_110epoch_double_deep_final' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux ) # just normaL cell keep elif args.model == "dd_normal": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'alpha0_5_stage1_double_deep_ep80' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPruneNormal( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux ) # normal+down elif args.model == "dd_normaldown": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'alpha0_5_stage1_double_deep_ep80' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPruneNormalDown( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux ) # normal+up elif args.model == "dd_normalup": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'alpha0_5_stage1_double_deep_ep80' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPruneNormalUp( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux ) # normal+up+down elif args.model == "alpha0_5_stage1_double_deep_ep80": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'alpha0_5_stage1_double_deep_ep80' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux ) # abliation study of channel doubling and deepsupervision elif args.model == "alpha0_5_stage1_double_nodeep_ep80": args.deepsupervision = False args.double_down_channel = True args.genotype_name = 'alpha0_5_stage1_double_nodeep_ep80' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux ) elif args.model == "alpha0_5_stage1_nodouble_deep_ep80": args.deepsupervision = True args.double_down_channel = False args.genotype_name = 'alpha0_5_stage1_nodouble_deep_ep80' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux ) elif args.model == "alpha0_5_stage1_nodouble_nodeep_ep80": args.deepsupervision = False args.double_down_channel = False args.genotype_name = 'alpha0_5_stage1_nodouble_nodeep_ep80' model_alphas = None genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune( genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux ) if torch.cuda.device_count() > 1 and args.use_cuda: logger.info('use: %d gpus', torch.cuda.device_count()) model = nn.DataParallel(model) setting = {k: v for k, v in args._get_kwargs()} logger.info(setting) logger.info(genotype) logger.info('param size = %fMB', calc_parameters_count(model)) # init loss if args.loss == 'bce': criterion = nn.BCELoss() elif args.loss == 'bcelog': criterion = nn.BCEWithLogitsLoss() elif args.loss == "dice": criterion = DiceLoss() elif args.loss == "softdice": criterion = SoftDiceLoss() elif args.loss == 'bcedice': criterion = BCEDiceLoss() else: criterion = nn.CrossEntropyLoss() if args.use_cuda: logger.info("load model and criterion to gpu !") model = model.to(args.device) criterion = criterion.to(args.device) # init optimizer optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum) # init schedulers Steplr scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epoch) # scheduler=torch.optim.lr_scheduler.StepLR(optimizer=optimizer,step_size=30,gamma=0.1,last_epoch=-1) ############################### check resume ######################### start_epoch = 0 if args.resume is not None: if os.path.isfile(args.resume): logger.info("Loading model and optimizer from checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=args.device) start_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) model.load_state_dict(checkpoint['state_dict']) scheduler.load_state_dict(checkpoint['scheduler']) else: raise FileNotFoundError("No checkpoint found at '{}'".format(args.resume)) #################################### train and val ######################## max_value = 0 for epoch in range(start_epoch, args.epoch): # lr=adjust_learning_rate(args,optimizer,epoch) scheduler.step() logger.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0]) # train if args.deepsupervision: mean_loss, value1, value2 = train(args, model, criterion, train_loader, optimizer) mr, ms, mp, mf, mjc, md, macc = value1 mmr, mms, mmp, mmf, mmjc, mmd, mmacc = value2 logger.info( "Epoch:{} Train_Loss:{:.3f} Acc:{:.3f} Dice:{:.3f} Jc:{:.3f}".format(epoch, mean_loss, macc, md, mjc)) logger.info(" dmAcc:{:.3f} dmDice:{:.3f} dmJc:{:.3f}".format(mmacc, mmd, mmjc)) writer.add_scalar('Train/dmAcc', mmacc, epoch) writer.add_scalar('Train/dRecall', mmr, epoch) writer.add_scalar('Train/dSpecifi', mms, epoch) writer.add_scalar('Train/dPrecision', mmp, epoch) writer.add_scalar('Train/dF1', mmf, epoch) writer.add_scalar('Train/dJc', mmjc, epoch) writer.add_scalar('Train/dDice', mmd, epoch) else: mean_loss, value1 = train(args, model, criterion, train_loader, optimizer) mr, ms, mp, mf, mjc, md, macc = value1 logger.info( "Epoch:{} Train_Loss:{:.3f} Acc:{:.3f} Dice:{:.3f} Jc:{:.3f}".format(epoch, mean_loss, macc, md, mjc)) # write writer.add_scalar('Train/Loss', mean_loss, epoch) writer.add_scalar('Train/mAcc', macc, epoch) writer.add_scalar('Train/Recall', mr, epoch) writer.add_scalar('Train/Specifi', ms, epoch) writer.add_scalar('Train/Precision', mp, epoch) writer.add_scalar('Train/F1', mf, epoch) writer.add_scalar('Train/Jc', mjc, epoch) writer.add_scalar('Train/Dice', md, epoch) # val if args.deepsupervision: vmean_loss, valuev1, valuev2 = infer(args, model, criterion, val_loader) vmr, vms, vmp, vmf, vmjc, vmd, vmacc = valuev1 mvmr, mvms, mvmp, mvmf, mvmjc, mvmd, mvmacc = valuev2 logger.info( "Epoch:{} Val_Loss:{:.3f} Acc:{:.3f} Dice:{:.3f} Jc:{:.3f}".format(epoch, vmean_loss, vmacc, vmd, vmjc)) logger.info(" dmAcc:{:.3f} dmDice:{:.3f} dmJc:{:.3f}".format(mvmacc, mvmd, mvmjc)) writer.add_scalar('Val/mAcc', mvmacc, epoch) writer.add_scalar('Val/Recall', mvmr, epoch) writer.add_scalar('Val/Specifi', mvms, epoch) writer.add_scalar('Val/Precision', mvmp, epoch) writer.add_scalar('Val/F1', mvmf, epoch) writer.add_scalar('Val/Jc', mvmjc, epoch) writer.add_scalar('Val/Dice', mvmd, epoch) else: vmean_loss, valuev1 = infer(args, model, criterion, val_loader) vmr, vms, vmp, vmf, vmjc, vmd, vmacc = valuev1 logger.info( "Epoch:{} Val_Loss:{:.3f} Acc:{:.3f} Dice:{:.3f} Jc:{:.3f}".format(epoch, vmean_loss, vmacc, vmd, vmjc)) is_best = True if (vmjc >=max_value) else False max_value = max(max_value, vmjc) writer.add_scalar('Val/Loss', vmean_loss, epoch) writer.add_scalar('Val/mAcc', vmacc, epoch) writer.add_scalar('Val/Recall', vmr, epoch) writer.add_scalar('Val/Specifi', vms, epoch) writer.add_scalar('Val/Precision', vmp, epoch) writer.add_scalar('Val/F1', vmf, epoch) writer.add_scalar('Val/Jc', vmjc, epoch) writer.add_scalar('Val/Dice', vmd, epoch) state={ 'epoch': epoch, 'optimizer': optimizer.state_dict(), 'state_dict': model.state_dict(), 'scheduler': model.state_dict(), } logger.info("epoch:{} best:{} max_value:{}".format(epoch,is_best,max_value)) if not is_best: torch.save(state,os.path.join(args.save_path,"checkpoint.pth.tar")) else: torch.save(state,os.path.join(args.save_path,"checkpoint.pth.tar")) torch.save(state,os.path.join(args.save_path,"model_best.pth.tar")) writer.close()
def main(args): ############ init config ################ #################### init logger ################################### log_dir = './search_exp/' + '/{}'.format(args.model) + \ '/{}'.format(args.dataset) + '/{}_{}'.format(time.strftime('%Y%m%d-%H%M%S'),args.note) logger = get_logger(log_dir) print('RUNDIR: {}'.format(log_dir)) logger.info('{}-Search'.format(args.model)) args.save_path = log_dir args.save_tbx_log = args.save_path + '/tbx_log' writer = SummaryWriter(args.save_tbx_log) ##################### init device ################################# if args.manualSeed is None: args.manualSeed = random.randint(1, 10000) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) args.use_cuda = args.gpus > 0 and torch.cuda.is_available() args.multi_gpu = args.gpus > 1 and torch.cuda.is_available() args.device = torch.device('cuda:0' if args.use_cuda else 'cpu') if args.use_cuda: torch.cuda.manual_seed(args.manualSeed) cudnn.enabled = True cudnn.benchmark = True setting = {k: v for k, v in args._get_kwargs()} logger.info(setting) ####################### init dataset ########################################### logger.info("Dataset for search is {}".format(args.dataset)) train_dataset = datasets_dict[args.dataset](args, args.dataset_root, split='train') val_dataset = datasets_dict[args.dataset](args, args.dataset_root, split='valid') # train_dataset=datasets_dict[args.dataset](args,split='train') # val_dataset=datasets_dict[args.dataset](args,split='valid') num_train = len(train_dataset) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # init loss if args.loss == 'bce': criterion = nn.BCELoss() elif args.loss == 'bcelog': criterion = nn.BCEWithLogitsLoss() elif args.loss == "dice": criterion = DiceLoss() elif args.loss == "softdice": criterion = SoftDiceLoss() elif args.loss == 'bcedice': criterion = BCEDiceLoss() else: criterion = nn.CrossEntropyLoss() if args.use_cuda: logger.info("load criterion to gpu !") criterion = criterion.to(args.device) ######################## init model ############################################ switches_normal = [] switches_down = [] switches_up = [] nums_mixop = sum([2 + i for i in range(args.meta_node_num)]) for i in range(nums_mixop): switches_normal.append([True for j in range(len(CellPos))]) for i in range(nums_mixop): switches_down.append([True for j in range(len(CellLinkDownPos))]) for i in range(nums_mixop): switches_up.append([True for j in range(len(CellLinkUpPos))]) # 6-->3-->1 drop_op_down = [2, 3] # 4-->2-->1 drop_op_up = [2, 1] # 7-->4-->1 drop_op_normal = [3, 3] # stage0 pruning stage 1 pruning, stage 2 (training) original_train_batch = args.train_batch original_val_batch = args.val_batch for sp in range(2): # build dataloader # model ,numclass=1,im_ch=3,init_channel=16,intermediate_nodes=4,layers=9 if sp == 0: args.model = "UnetLayer7" args.layers = 7 sp_train_batch = original_train_batch sp_val_batch = original_val_batch sp_epoch = args.epochs sp_lr = args.lr else: #args.model = "UnetLayer9" # 在算力平台上面UnetLayer9就是UnetLayer9_v2 args.model = "UnetLayer9" args.layers = 9 sp_train_batch = original_train_batch sp_val_batch = original_val_batch sp_lr = args.lr sp_epoch = args.epochs train_queue = data.DataLoader( train_dataset, batch_size=sp_train_batch, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[:split]), pin_memory=True, num_workers=args.num_workers) val_queue = data.DataLoader( train_dataset, batch_size=sp_train_batch, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=args.num_workers) test_dataloader = data.DataLoader(val_dataset, batch_size=sp_val_batch, pin_memory=True, num_workers=args.num_workers) logger.info( "stage:{} model:{} epoch:{} lr:{} train_batch:{} val_batch:{}". format(sp, args.model, sp_epoch, sp_lr, sp_train_batch, sp_val_batch)) model = get_models(args, switches_normal, switches_down, switches_up) save_model_path = os.path.join(args.save_path, "stage_{}_model".format(sp)) if not os.path.exists(save_model_path): os.mkdir(save_model_path) if args.multi_gpu: logger.info('use: %d gpus', args.gpus) model = nn.DataParallel(model) model = model.to(args.device) logger.info('param size = %fMB', calc_parameters_count(model)) # init optimizer for arch parameters and weight parameters # final stage, just train the network parameters optimizer_arch = torch.optim.Adam(model.arch_parameters(), lr=args.arch_lr, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay) optimizer_weight = torch.optim.SGD(model.weight_parameters(), lr=sp_lr, weight_decay=args.weight_decay, momentum=args.momentum) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer_weight, sp_epoch, eta_min=args.lr_min) #################################### train and val ######################## max_value = 0 for epoch in range(0, sp_epoch): # lr=adjust_learning_rate(args,optimizer,epoch) scheduler.step() logger.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0]) # train if epoch < args.arch_after: weight_loss_avg, arch_loss_avg, mr, ms, mp, mf, mjc, md, macc = train( args, train_queue, val_queue, model, criterion, optimizer_weight, optimizer_arch, train_arch=False) else: weight_loss_avg, arch_loss_avg, mr, ms, mp, mf, mjc, md, macc = train( args, train_queue, val_queue, model, criterion, optimizer_weight, optimizer_arch, train_arch=True) logger.info("Epoch:{} WeightLoss:{:.3f} ArchLoss:{:.3f}".format( epoch, weight_loss_avg, arch_loss_avg)) logger.info(" Acc:{:.3f} Dice:{:.3f} Jc:{:.3f}".format( macc, md, mjc)) # write writer.add_scalar('Train/W_loss', weight_loss_avg, epoch) writer.add_scalar('Train/A_loss', arch_loss_avg, epoch) writer.add_scalar('Train/Dice', md, epoch) # infer if (epoch + 1) % args.infer_epoch == 0: genotype = model.genotype() logger.info('genotype = %s', genotype) val_loss, (vmr, vms, vmp, vmf, vmjc, vmd, vmacc) = infer(args, model, val_queue, criterion) logger.info( "ValLoss:{:.3f} ValAcc:{:.3f} ValDice:{:.3f} ValJc:{:.3f}" .format(val_loss, vmacc, vmd, vmjc)) writer.add_scalar('Val/loss', val_loss, epoch) is_best = True if (vmjc >= max_value) else False max_value = max(max_value, vmjc) state = { 'epoch': epoch, 'optimizer_arch': optimizer_arch.state_dict(), 'optimizer_weight': optimizer_weight.state_dict(), 'scheduler': scheduler.state_dict(), 'state_dict': model.state_dict(), 'alphas_dict': model.alphas_dict(), } logger.info("epoch:{} best:{} max_value:{}".format( epoch, is_best, max_value)) if not is_best: torch.save( state, os.path.join(save_model_path, "checkpoint.pth.tar")) else: torch.save( state, os.path.join(save_model_path, "checkpoint.pth.tar")) torch.save( state, os.path.join(save_model_path, "model_best.pth.tar")) # one stage end, we should change the operations num (divided 2) weight_down = F.softmax(model.arch_parameters()[0], dim=-1).data.cpu().numpy() weight_up = F.softmax(model.arch_parameters()[1], dim=-1).data.cpu().numpy() weight_normal = F.softmax(model.arch_parameters()[2], dim=-1).data.cpu().numpy() weight_network = F.softmax(model.arch_parameters()[3], dim=-1).data.cpu().numpy() logger.info("alphas_down: \n{}".format(weight_down)) logger.info("alphas_up: \n{}".format(weight_up)) logger.info("alphas_normal: \n{}".format(weight_normal)) logger.info("alphas_network: \n{}".format(weight_network)) genotype = model.genotype() logger.info('Stage:{} \n Genotype: {}'.format(sp, genotype)) logger.info( '------Stage {} end ! Then Dropping Paths------'.format(sp)) # 6 4 7 # CellLinkDownPos CellLinkUpPos CellPos # # 6-->3-->1 # drop_op_down = [3, 2] # # 4-->2-->1 # drop_op_up = [2, 1] # # 7-->4-->1 # drop_op_normal = [3, 3] # update switches in 0 stage end if sp == 0: switches_down = update_switches(weight_down.copy(), switches_down.copy(), CellLinkDownPos, drop_op_down[sp]) switches_up = update_switches(weight_up.copy(), switches_up.copy(), CellLinkUpPos, drop_op_up[sp]) switches_normal = update_switches(weight_normal.copy(), switches_normal.copy(), CellPos, drop_op_normal[sp]) logger.info('switches_down = %s', switches_down) logger.info('switches_up = %s', switches_up) logger.info('switches_normal = %s', switches_normal) logging_switches(logger, switches_down, CellLinkDownPos) logging_switches(logger, switches_up, CellLinkUpPos) logging_switches(logger, switches_normal, CellPos) else: # sp==1 is the final stage, we don`t need the keep operations # because we has the model.genotype # show the final one op in 14 mixop switches_down = update_switches(weight_down.copy(), switches_down.copy(), CellLinkDownPos, drop_op_down[sp]) switches_up = update_switches(weight_up.copy(), switches_up.copy(), CellLinkUpPos, drop_op_up[sp]) switches_normal = update_switches_nozero(weight_normal.copy(), switches_normal.copy(), CellPos, drop_op_normal[sp]) logger.info('switches_down = %s', switches_down) logger.info('switches_up = %s', switches_up) logger.info('switches_normal = %s', switches_normal) logging_switches(logger, switches_down, CellLinkDownPos) logging_switches(logger, switches_up, CellLinkUpPos) logging_switches(logger, switches_normal, CellPos) writer.close()
def train(cont=False): # for tensorboard tracking logger = get_logger() logger.info("(1) Initiating Training ... ") logger.info("Training on device: {}".format(device)) writer = SummaryWriter() # init model aux_layers = None if net == "SETR-PUP": aux_layers, model = get_SETR_PUP() elif net == "SETR-MLA": aux_layers, model = get_SETR_MLA() elif net == "TransUNet-Base": model = get_TransUNet_base() elif net == "TransUNet-Large": model = get_TransUNet_large() elif net == "UNet": model = UNet(CLASS_NUM) # prepare dataset cluster_model = get_clustering_model(logger) train_dataset = CityscapeDataset(img_dir=data_dir, img_dim=IMG_DIM, mode="train", cluster_model=cluster_model) valid_dataset = CityscapeDataset(img_dir=data_dir, img_dim=IMG_DIM, mode="val", cluster_model=cluster_model) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False) logger.info("(2) Dataset Initiated. ") # optimizer epochs = epoch_num if epoch_num > 0 else iteration_num // len( train_loader) + 1 optim = SGD(model.parameters(), lr=lrate, momentum=momentum, weight_decay=wdecay) # optim = Adam(model.parameters(), lr=lrate) scheduler = lr_scheduler.MultiStepLR( optim, milestones=[int(epochs * fine_tune_ratio)], gamma=0.1) cur_epoch = 0 best_loss = float('inf') epochs_since_improvement = 0 # for continue training if cont: model, optim, cur_epoch, best_loss = load_ckpt_continue_training( best_ckpt_src, model, optim, logger) logger.info("Current best loss: {0}".format(best_loss)) with warnings.catch_warnings(): warnings.simplefilter("ignore") for i in range(cur_epoch): scheduler.step() else: model = nn.DataParallel(model) model = model.to(device) logger.info("(3) Model Initiated ... ") logger.info("Training model: {}".format(net) + ". Training Started.") # loss ce_loss = CrossEntropyLoss() if use_dice_loss: dice_loss = DiceLoss(CLASS_NUM) # loop over epochs iter_count = 0 epoch_bar = tqdm.tqdm(total=epochs, desc="Epoch", position=cur_epoch, leave=True) logger.info("Total epochs: {0}. Starting from epoch {1}.".format( epochs, cur_epoch + 1)) for e in range(epochs - cur_epoch): epoch = e + cur_epoch # Training. model.train() trainLossMeter = LossMeter() train_batch_bar = tqdm.tqdm(total=len(train_loader), desc="TrainBatch", position=0, leave=True) for batch_num, (orig_img, mask_img) in enumerate(train_loader): orig_img, mask_img = orig_img.float().to( device), mask_img.float().to(device) if net == "TransUNet-Base" or net == "TransUNet-Large": pred = model(orig_img) elif net == "SETR-PUP" or net == "SETR-MLA": if aux_layers is not None: pred, _ = model(orig_img) else: pred = model(orig_img) elif net == "UNet": pred = model(orig_img) loss_ce = ce_loss(pred, mask_img[:].long()) if use_dice_loss: loss_dice = dice_loss(pred, mask_img, softmax=True) loss = 0.5 * (loss_ce + loss_dice) else: loss = loss_ce # Backward Propagation, Update weight and metrics optim.zero_grad() loss.backward() optim.step() # update learning rate for param_group in optim.param_groups: orig_lr = param_group['lr'] param_group['lr'] = orig_lr * (1.0 - iter_count / iteration_num)**0.9 iter_count += 1 # Update loss trainLossMeter.update(loss.item()) # print status if (batch_num + 1) % print_freq == 0: status = 'Epoch: [{0}][{1}/{2}]\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(epoch+1, batch_num+1, len(train_loader), loss=trainLossMeter) logger.info(status) # log loss to tensorboard if (batch_num + 1) % tensorboard_freq == 0: writer.add_scalar( 'Train_Loss_{0}'.format(tensorboard_freq), trainLossMeter.avg, epoch * (len(train_loader) / tensorboard_freq) + (batch_num + 1) / tensorboard_freq) train_batch_bar.update(1) writer.add_scalar('Train_Loss_epoch', trainLossMeter.avg, epoch) # Validation. model.eval() validLossMeter = LossMeter() valid_batch_bar = tqdm.tqdm(total=len(valid_loader), desc="ValidBatch", position=0, leave=True) with torch.no_grad(): for batch_num, (orig_img, mask_img) in enumerate(valid_loader): orig_img, mask_img = orig_img.float().to( device), mask_img.float().to(device) if net == "TransUNet-Base" or net == "TransUNet-Large": pred = model(orig_img) elif net == "SETR-PUP" or net == "SETR-MLA": if aux_layers is not None: pred, _ = model(orig_img) else: pred = model(orig_img) elif net == "UNet": pred = model(orig_img) loss_ce = ce_loss(pred, mask_img[:].long()) if use_dice_loss: loss_dice = dice_loss(pred, mask_img, softmax=True) loss = 0.5 * (loss_ce + loss_dice) else: loss = loss_ce # Update loss validLossMeter.update(loss.item()) # print status if (batch_num + 1) % print_freq == 0: status = 'Validation: [{0}][{1}/{2}]\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(epoch+1, batch_num+1, len(valid_loader), loss=validLossMeter) logger.info(status) # log loss to tensorboard if (batch_num + 1) % tensorboard_freq == 0: writer.add_scalar( 'Valid_Loss_{0}'.format(tensorboard_freq), validLossMeter.avg, epoch * (len(valid_loader) / tensorboard_freq) + (batch_num + 1) / tensorboard_freq) valid_batch_bar.update(1) valid_loss = validLossMeter.avg writer.add_scalar('Valid_Loss_epoch', valid_loss, epoch) logger.info("Validation Loss of epoch [{0}/{1}]: {2}\n".format( epoch + 1, epochs, valid_loss)) # update optim scheduler scheduler.step() # save checkpoint is_best = valid_loss < best_loss best_loss_tmp = min(valid_loss, best_loss) if not is_best: epochs_since_improvement += 1 logger.info("Epochs since last improvement: %d\n" % (epochs_since_improvement, )) if epochs_since_improvement == early_stop_tolerance: break # early stopping. else: epochs_since_improvement = 0 state = { 'epoch': epoch, 'loss': best_loss_tmp, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optim.state_dict(), } torch.save(state, ckpt_src) logger.info("Checkpoint updated.") best_loss = best_loss_tmp epoch_bar.update(1) writer.close()
def main(args): #################### init logger ################################### log_dir = './logs/' + '{}'.format(args.dataset) + '/{}_{}_{}'.format( args.model, args.note, time.strftime('%Y%m%d-%H%M%S')) logger = get_logger(log_dir) print('RUNDIR: {}'.format(log_dir)) logger.info('{}-Train'.format(args.model)) # setting setting = {k: v for k, v in args._get_kwargs()} logger.info(setting) args.save_path = log_dir args.save_tbx_log = args.save_path + '/tbx_log' writer = SummaryWriter(args.save_tbx_log) ##################### init device ################################# if args.manualSeed is None: args.manualSeed = random.randint(1, 10000) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) args.use_cuda = args.gpus > 0 and torch.cuda.is_available() args.device = torch.device('cuda' if args.use_cuda else 'cpu') if args.use_cuda: torch.cuda.manual_seed(args.manualSeed) cudnn.benchmark = True ####################### init dataset ########################################### train_loader = get_dataloder(args, split_flag="train") val_loader = get_dataloder(args, split_flag="valid") ######################## init model ############################################ # model ############init model ########################### if args.model == "nodouble_deep_init32_ep100": args.deepsupervision = True args.double_down_channel = False args.genotype_name = 'nodouble_deep_init32_ep100' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=32, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=9, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "nodouble_deep_isic": args.deepsupervision = True args.double_down_channel = False args.genotype_name = 'stage1_layer9_110epoch_deep_final' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "nodouble_deep_drop02_layer7end": args.deepsupervision = True args.double_down_channel = False args.genotype_name = 'nodouble_deep_drop02_layer7end' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "stage1_nodouble_deep_ep36": args.deepsupervision = True args.double_down_channel = False args.genotype_name = 'stage1_nodouble_deep_ep36' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "stage1_nodouble_deep_ep63": args.deepsupervision = True args.double_down_channel = False args.genotype_name = 'stage1_nodouble_deep_ep63' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "stage1_nodouble_deep_ep83": args.deepsupervision = True args.double_down_channel = False args.genotype_name = 'stage1_nodouble_deep_ep83' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "alpha1_stage1_double_deep_ep80": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'alpha1_stage1_double_deep_ep80' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "alpha0_stage1_double_deep_ep80": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'alpha0_stage1_double_deep_ep80' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "alpha0_5_stage1_double_deep_ep80": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'alpha0_5_stage1_double_deep_ep80' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) # isic trans elif args.model == "stage1_layer9_110epoch_double_deep_final": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'stage1_layer9_110epoch_double_deep_final' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) #chaos elif args.model == "stage0_double_deep_ep80_newim": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'stage0_double_deep_ep80_newim' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "stage1_double_deep_ep80": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'stage1_double_deep_ep80' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) elif args.model == "stage1_double_deep_ep80_ts": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'stage1_double_deep_ep80_ts' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) # cvc trans elif args.model == "layer7_double_deep": args.deepsupervision = True args.double_down_channel = True args.genotype_name = 'layer7_double_deep' genotype = eval('genotypes.%s' % args.genotype_name) model = BuildNasUnetPrune(genotype=genotype, input_c=args.in_channels, c=args.init_channels, num_classes=args.nclass, meta_node_num=args.middle_nodes, layers=args.layers, dp=args.dropout_prob, use_sharing=args.use_sharing, double_down_channel=args.double_down_channel, aux=args.aux) if torch.cuda.device_count() > 1 and args.use_cuda: logger.info('use: %d gpus', torch.cuda.device_count()) model = nn.DataParallel(model) setting = {k: v for k, v in args._get_kwargs()} logger.info(setting) logger.info(genotype) logger.info('param size = %fMB', calc_parameters_count(model)) # init loss if args.loss == 'bce': criterion = nn.BCELoss() elif args.loss == 'bcelog': criterion = nn.BCEWithLogitsLoss() elif args.loss == "dice": criterion = DiceLoss() elif args.loss == "softdice": criterion = SoftDiceLoss() elif args.loss == 'bcedice': criterion = BCEDiceLoss() elif args.loss == 'multibcedice': criterion = MultiClassEntropyDiceLoss() else: criterion = nn.CrossEntropyLoss() if args.use_cuda: logger.info("load model and criterion to gpu !") model = model.to(args.device) criterion = criterion.to(args.device) # init optimizer optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum) # init schedulers Steplr scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.epoch) # scheduler=torch.optim.lr_scheduler.StepLR(optimizer=optimizer,step_size=30,gamma=0.1,last_epoch=-1) ############################### check resume ######################### start_epoch = 0 if args.resume is not None: if os.path.isfile(args.resume): logger.info( "Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume, map_location=args.device) start_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) model.load_state_dict(checkpoint['state_dict']) scheduler.load_state_dict(checkpoint['scheduler']) else: raise FileNotFoundError("No checkpoint found at '{}'".format( args.resume)) #################################### train and val ######################## max_value = 0 for epoch in range(start_epoch, args.epoch): # lr=adjust_learning_rate(args,optimizer,epoch) scheduler.step() # logger.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0]) # train total_loss = train(args, model, criterion, train_loader, optimizer, epoch, logger) # write writer.add_scalar('Train/total_loss', total_loss, epoch) # val tloss, md = val(args, model, criterion, val_loader, epoch, logger) writer.add_scalar('Val/total_loss', tloss, epoch) is_best = True if (md >= max_value) else False max_value = max(max_value, md) state = { 'epoch': epoch, 'optimizer': optimizer.state_dict(), 'state_dict': model.state_dict(), 'scheduler': model.state_dict(), } logger.info("epoch:{} best:{} max_value:{}".format( epoch, is_best, max_value)) if not is_best: torch.save(state, os.path.join(args.save_path, "checkpoint.pth.tar")) else: torch.save(state, os.path.join(args.save_path, "checkpoint.pth.tar")) torch.save(state, os.path.join(args.save_path, "model_best.pth.tar")) writer.close()
def trainer_synapse(args, model, snapshot_path): from datasets.dataset_synapse import Synapse_dataset, RandomGenerator logging.basicConfig(filename=snapshot_path + "/log.txt", level=logging.INFO, format='[%(asctime)s.%(msecs)03d] %(message)s', datefmt='%H:%M:%S') logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) logging.info(str(args)) base_lr = args.base_lr num_classes = args.num_classes batch_size = args.batch_size * args.n_gpu # max_iterations = args.max_iterations db_train = Synapse_dataset(base_dir=args.root_path, list_dir=args.list_dir, split="train", transform=transforms.Compose( [RandomGenerator(output_size=[args.img_size, args.img_size])])) print("The length of train set is: {}".format(len(db_train))) def worker_init_fn(worker_id): random.seed(args.seed + worker_id) trainloader = DataLoader(db_train, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True, worker_init_fn=worker_init_fn) if args.n_gpu > 1: model = nn.DataParallel(model) model.train() ce_loss = CrossEntropyLoss() dice_loss = DiceLoss(num_classes) optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001) writer = SummaryWriter(snapshot_path + '/log') iter_num = 0 max_epoch = args.max_epochs max_iterations = args.max_epochs * len(trainloader) # max_epoch = max_iterations // len(trainloader) + 1 logging.info("{} iterations per epoch. {} max iterations ".format(len(trainloader), max_iterations)) best_performance = 0.0 iterator = tqdm(range(max_epoch), ncols=70) for epoch_num in iterator: for i_batch, sampled_batch in enumerate(trainloader): image_batch, label_batch = sampled_batch['image'], sampled_batch['label'] image_batch, label_batch = image_batch.cuda(), label_batch.cuda() outputs = model(image_batch) loss_ce = ce_loss(outputs, label_batch[:].long()) loss_dice = dice_loss(outputs, label_batch, softmax=True) loss = 0.5 * loss_ce + 0.5 * loss_dice optimizer.zero_grad() loss.backward() optimizer.step() lr_ = base_lr * (1.0 - iter_num / max_iterations) ** 0.9 for param_group in optimizer.param_groups: param_group['lr'] = lr_ iter_num = iter_num + 1 writer.add_scalar('info/lr', lr_, iter_num) writer.add_scalar('info/total_loss', loss, iter_num) writer.add_scalar('info/loss_ce', loss_ce, iter_num) logging.info('iteration %d : loss : %f, loss_ce: %f' % (iter_num, loss.item(), loss_ce.item())) if iter_num % 20 == 0: image = image_batch[1, 0:1, :, :] image = (image - image.min()) / (image.max() - image.min()) writer.add_image('train/Image', image, iter_num) outputs = torch.argmax(torch.softmax(outputs, dim=1), dim=1, keepdim=True) writer.add_image('train/Prediction', outputs[1, ...] * 50, iter_num) labs = label_batch[1, ...].unsqueeze(0) * 50 writer.add_image('train/GroundTruth', labs, iter_num) save_interval = 50 # int(max_epoch/6) if epoch_num > int(max_epoch / 2) and (epoch_num + 1) % save_interval == 0: save_mode_path = os.path.join(snapshot_path, 'epoch_' + str(epoch_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) if epoch_num >= max_epoch - 1: save_mode_path = os.path.join(snapshot_path, 'epoch_' + str(epoch_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) iterator.close() break writer.close() return "Training Finished!"
def trainer_HuBMAP(args, model, snapshot_path): from datasets.dataset_HuBMAP import HuBMAP_dataset, RandomGenerator from datasets.dataset_HuBMAP import HuBMAP_dataset, Generator logging.basicConfig(filename=snapshot_path + "/log.txt", level=logging.INFO, format='[%(asctime)s.%(msecs)03d] %(message)s', datefmt='%H:%M:%S') logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) logging.info(str(args)) base_lr = args.base_lr num_classes = args.num_classes batch_size = args.batch_size * args.n_gpu # max_iterations = args.max_iterations db_train = HuBMAP_dataset(base_dir=args.root_path, list_dir=args.list_dir, split="train", transform=transforms.Compose( [RandomGenerator(output_size=[args.img_size, args.img_size])])) print("The length of train set is: {}".format(len(db_train))) ###azhe! db_val = HuBMAP_dataset(base_dir=args.root_path, list_dir=args.list_dir, split="val", transform=transforms.Compose( [Generator(output_size=[args.img_size, args.img_size])])) print("The length of val set is: {}".format(len(db_val))) ###azhe! def worker_init_fn(worker_id): random.seed(args.seed + worker_id) trainloader = DataLoader(db_train, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True, worker_init_fn=worker_init_fn) ### val loader valloader = DataLoader(db_val, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True, worker_init_fn=worker_init_fn) if args.n_gpu > 1: model = nn.DataParallel(model) model.train() ce_loss = CrossEntropyLoss() dice_loss = DiceLoss(num_classes) optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001) writer = SummaryWriter(snapshot_path + '/log') iter_num = 0 max_epoch = args.max_epochs max_iterations = args.max_epochs * len(trainloader) # max_epoch = max_iterations // len(trainloader) + 1 logging.info("{} iterations per epoch. {} max iterations ".format(len(trainloader), max_iterations)) best_performance = 0.0 low_val_loss_dice = np.inf train_loss_list = [] train_loss_dice_list = [] val_loss_list = [] val_loss_dice_list = [] for epoch_num in range(max_epoch): total_train_loss = 0 total_train_dice_loss = 0 batch_num = 0 for i_batch, sampled_batch in enumerate(trainloader): print("epoch: "+ str(epoch_num) + " training progress: {:.2f}".format(batch_num/len(trainloader)*100) + "%", end="\r") image_batch, label_batch = sampled_batch['image'], sampled_batch['label'] image_batch, label_batch = image_batch.cuda(), label_batch.cuda() outputs = model(image_batch) loss_ce = ce_loss(outputs, label_batch[:].long()) loss_dice = dice_loss(outputs, label_batch, softmax=True) loss = 0.5 * loss_ce + 0.5 * loss_dice optimizer.zero_grad() loss.backward() optimizer.step() lr_ = base_lr * (1.0 - iter_num / max_iterations) ** 0.9 for param_group in optimizer.param_groups: param_group['lr'] = lr_ ### get total loss total_train_loss += loss.item() total_train_dice_loss += loss_dice.item() ### # update iter num for adaptive leraning rate # update batch_num for getting average loss writer.add_scalar('info/lr', lr_, iter_num) iter_num = iter_num + 1 batch_num += 1 avg_train_loss = total_train_loss/batch_num avg_train_loss_dice = total_train_dice_loss/batch_num writer.add_scalar('info/avg_train_loss', avg_train_loss, epoch_num) writer.add_scalar('info/avg_train_loss_dice', avg_train_loss_dice, epoch_num) train_loss_list.append(avg_train_loss) train_loss_dice_list.append(avg_train_loss_dice) np.save('train_loss.npy', train_loss_list) np.save('train_loss_dice.npy', train_loss_dice_list) if epoch_num % 1 == 0: image = image_batch[1, 0:1, :, :] image = (image - image.min()) / (image.max() - image.min()) writer.add_image('train/Image', image, epoch_num) outputs = torch.argmax(torch.softmax(outputs, dim=1), dim=1, keepdim=True) writer.add_image('train/Prediction', outputs[1, ...] * 50, epoch_num) labs = label_batch[1, ...].unsqueeze(0) * 50 writer.add_image('train/GroundTruth', labs, epoch_num) ######################### VALIDATION ########################### total_val_loss = 0 total_val_dice_loss = 0 batch_num = 0 for i_batch, sampled_batch in enumerate(valloader): print("epoch: "+ str(epoch_num) + " validation progress: {:.2f}".format(batch_num/len(valloader)*100) + "%", end="\r") model.eval() image_batch, label_batch = sampled_batch['image'], sampled_batch['label'] image_batch, label_batch = image_batch.cuda(), label_batch.cuda() outputs = model(image_batch) np.save('val_pred.npy', outputs.detach().cpu().numpy()) np.save('val_img.npy', image_batch.detach().cpu().numpy()) np.save('val_label.npy',label_batch.detach().cpu().numpy()) loss_ce = ce_loss(outputs, label_batch[:].long()) loss_dice = dice_loss(outputs, label_batch, softmax=True) loss = 0.5 * loss_ce + 0.5 * loss_dice optimizer.zero_grad() ### total_val_loss += loss.item() total_val_dice_loss += loss_dice.item() ### batch_num = batch_num + 1 avg_val_loss = total_val_loss/batch_num avg_val_loss_dice = total_val_dice_loss/batch_num writer.add_scalar('info/avg_val_loss', avg_val_loss, epoch_num) writer.add_scalar('info/avg_val_loss_dice', avg_val_loss_dice, epoch_num) logging.info('Epoch %d : train_loss : %f, train_loss_dice: %f, val_loss: %f, val_loss_dice: %f' % (epoch_num, avg_train_loss, avg_train_loss_dice,avg_val_loss, avg_val_loss_dice)) val_loss_list.append(avg_val_loss) val_loss_dice_list.append(avg_val_loss_dice) np.save('val_loss.npy', val_loss_list) np.save('val_loss_dice.npy', val_loss_dice_list) if epoch_num % 1 == 0: image = image_batch[1, 0:1, :, :] image = (image - image.min()) / (image.max() - image.min()) writer.add_image('val/Image', image, epoch_num) outputs = torch.argmax(torch.softmax(outputs, dim=1), dim=1, keepdim=True) writer.add_image('val/Prediction', outputs[1, ...] * 50, epoch_num) labs = label_batch[1, ...].unsqueeze(0) * 50 writer.add_image('val/GroundTruth', labs, epoch_num) if avg_val_loss_dice < low_val_loss_dice: low_val_loss_dice = avg_val_loss_dice save_mode_path = os.path.join(snapshot_path, 'epoch_' + str(epoch_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("current best model find!!") del sampled_batch, image_batch, label_batch gc.collect() torch.cuda.empty_cache() ### ''' save_interval = 50 # int(max_epoch/6) if epoch_num > int(max_epoch / 2) and (epoch_num + 1) % save_interval == 0: save_mode_path = os.path.join(snapshot_path, 'epoch_' + str(epoch_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) if epoch_num >= max_epoch - 1: save_mode_path = os.path.join(snapshot_path, 'epoch_' + str(epoch_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) iterator.close() break ''' writer.close() return "Training Finished!"
def main(args): ############ init config ################ model_name = args.model assert model_name in models_dict.keys(),"The Usage model is not exist !" print('Usage model :{}'.format(model_name)) #################### init logger ################################### log_dir = './logs/'+ args.model+'_'+args.note + '/{}'.format(time.strftime('%Y%m%d-%H%M%S')) logger = get_logger(log_dir) print('RUNDIR: {}'.format(log_dir)) logger.info('{}-Train'.format(args.model)) # setting setting={k: v for k, v in args._get_kwargs()} logger.info(setting) args.save_path = log_dir args.save_tbx_log = args.save_path + '/tbx_log' writer = SummaryWriter(args.save_tbx_log) ##################### init device ################################# if args.manualSeed is None: args.manualSeed = random.randint(1, 10000) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) args.use_cuda= args.gpus>0 and torch.cuda.is_available() args.device = torch.device('cuda' if args.use_cuda else 'cpu') if args.use_cuda: torch.cuda.manual_seed(args.manualSeed) cudnn.benchmark = True ####################### init dataset ########################################### train_loader=get_dataloder(args,split_flag="train") val_loader=get_dataloder(args,split_flag="valid") ######################## init model ############################################ # model logger.info("Model Dict has keys: \n {}".format(models_dict.keys())) model=get_models(args) if torch.cuda.device_count() > 1 and args.use_cuda: logger.info('use: %d gpus', torch.cuda.device_count()) model = nn.DataParallel(model) logger.info('param size = %fMB', calc_parameters_count(model)) # init loss if args.loss=='bce': criterion=nn.BCELoss() elif args.loss=='bcelog': criterion=nn.BCEWithLogitsLoss() elif args.loss=="dice": criterion=DiceLoss() elif args.loss=="softdice": criterion=SoftDiceLoss() elif args.loss=='bcedice': criterion=BCEDiceLoss() else: criterion=nn.CrossEntropyLoss() if args.use_cuda: logger.info("load model and criterion to gpu !") model=model.to(args.device) criterion=criterion.to(args.device) # init optimizer if args.model_optimizer=="sgd": #torch.optim.SGD(parametetrs,lr=args.lr,weight_decay=args.weight_decay,momentum=args.momentum) optimizer=torch.optim.SGD(model.parameters(),lr=args.lr,weight_decay=args.weight_decay,momentum=args.momentum) else: optimizer=torch.optim.Adam(model.parameters(),args.lr,[args.beta1, args.beta2], weight_decay=args.weight_decay) # init schedulers Steplr scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,args.epoch) # scheduler=torch.optim.lr_scheduler.StepLR(optimizer=optimizer,step_size=30,gamma=0.1,last_epoch=-1) ############################### check resume ######################### start_epoch=0 if args.resume is not None: if os.path.isfile(args.resume): logger.info("Loading model and optimizer from checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=args.device) start_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) model.load_state_dict(checkpoint['state_dict']) scheduler.load_state_dict(checkpoint['scheduler']) else: raise FileNotFoundError("No checkpoint found at '{}'".format(args.resume)) #################################### train and val ######################## max_value=0 for epoch in range(start_epoch,args.epoch): # lr=adjust_learning_rate(args,optimizer,epoch) scheduler.step() logger.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0]) # train mr, ms, mp, mf, mjc, md, macc, mean_loss=train(args, model, criterion, train_loader, optimizer, epoch, logger) # write writer.add_scalar('Train/Loss', mean_loss, epoch) writer.add_scalar('Train/mAcc', macc, epoch) writer.add_scalar('Train/Recall', mr, epoch) writer.add_scalar('Train/Specifi', ms, epoch) writer.add_scalar('Train/Precision', mp, epoch) writer.add_scalar('Train/F1', mf, epoch) writer.add_scalar('Train/Jc', mjc, epoch) writer.add_scalar('Train/Dice', md, epoch) # val vmr, vms, vmp, vmf, vmjc, vmd, vmacc, vmean_loss=val(args, model, criterion, val_loader, epoch, logger) writer.add_scalar('Val/Loss', vmean_loss, epoch) writer.add_scalar('Val/mAcc', vmacc, epoch) writer.add_scalar('Val/Recall', vmr, epoch) writer.add_scalar('Val/Specifi', vms, epoch) writer.add_scalar('Val/Precision', vmp, epoch) writer.add_scalar('Val/F1', vmf, epoch) writer.add_scalar('Val/Jc', vmjc, epoch) writer.add_scalar('Val/Dice', vmd, epoch) is_best=True if (vmjc>=max_value) else False max_value=max(max_value,vmjc) state={ 'epoch': epoch, 'optimizer': optimizer.state_dict(), 'state_dict': model.state_dict(), 'scheduler': model.state_dict(), } logger.info("epoch:{} best:{} max_value:{}".format(epoch,is_best,max_value)) if not is_best: torch.save(state,os.path.join(args.save_path,"checkpoint.pth.tar")) else: torch.save(state,os.path.join(args.save_path,"checkpoint.pth.tar")) torch.save(state,os.path.join(args.save_path,"model_best.pth.tar")) writer.close()
def trainer_synapse(args, model, snapshot_path): from datasets.dataset_synapse import Synapse_dataset, LiTS_dataset, KiTS_dataset, RandomGenerator logging.basicConfig(filename=snapshot_path + "/log.txt", level=logging.INFO, format='[%(asctime)s.%(msecs)03d] %(message)s', datefmt='%H:%M:%S') logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) logging.info(str(args)) base_lr = args.base_lr num_classes = args.num_classes batch_size = args.batch_size * args.n_gpu # max_iterations = args.max_iterations # db_train = Synapse_dataset(base_dir=args.root_path, list_dir=args.list_dir, split="train", # transform=transforms.Compose( # [RandomGenerator(output_size=[args.img_size, args.img_size])])) if args.dataset == 'LiTS': db_train = LiTS_dataset( base_dir=args.root_path, split='train', transform=transforms.Compose( [RandomGenerator(output_size=[args.img_size, args.img_size])])) elif 'LiTS_tumor' in args.dataset: db_train = LiTS_dataset( base_dir=args.root_path, split='train', transform=transforms.Compose( [RandomGenerator(output_size=[args.img_size, args.img_size])]), tumor_only=True) elif 'KiTS_tumor' in args.dataset: db_train = KiTS_dataset( base_dir=args.root_path, split='train', transform=transforms.Compose( [RandomGenerator(output_size=[args.img_size, args.img_size])]), tumor_only=True) else: raise NotImplementedError('dataset not found!') print("The length of train set is: {}".format(len(db_train))) def worker_init_fn(worker_id): random.seed(args.seed + worker_id) trainloader = DataLoader(db_train, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True, worker_init_fn=worker_init_fn, drop_last=True) if args.n_gpu > 1: model = nn.DataParallel(model) model.train() if args.unfreeze_epoch: model.freeze_backbone = True ce_loss = CrossEntropyLoss() dice_loss = DiceLoss(num_classes) optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001) writer = SummaryWriter(snapshot_path + '/log') iter_num = 0 max_epoch = args.max_epochs max_iterations = args.max_epochs * len( trainloader) # max_epoch = max_iterations // len(trainloader) + 1 logging.info("{} iterations per epoch. {} max iterations ".format( len(trainloader), max_iterations)) best_performance = 0.0 iterator = tqdm(range(max_epoch), ncols=70) for epoch_num in iterator: if epoch_num + 1 == args.unfreeze_epoch: base_lr /= 10 model.freeze_backbone = False for g in optimizer.param_groups: g['lr'] = base_lr logging.info( 'unfreezing backbone, reducing learning rate to {}'.format( base_lr)) for i_batch, sampled_batch in enumerate(trainloader): image_batch, label_batch = sampled_batch['image'], sampled_batch[ 'label'] image_batch, label_batch = image_batch.cuda(), label_batch.cuda() aux_outputs = None if args.model == 'deeplab_resnest': outputs, aux_outputs = model(image_batch) else: outputs = model(image_batch) loss_ce = ce_loss(outputs, label_batch[:].long()) if args.dataset == 'LiTS_tumor': loss_dice = dice_loss(outputs, label_batch, weight=[1, 1], softmax=True) else: loss_dice = dice_loss(outputs, label_batch, softmax=True) loss = 0.5 * loss_ce + 0.5 * loss_dice if aux_outputs != None: loss_ce_aux = ce_loss(aux_outputs, label_batch[:].long()) loss_dice_aux = dice_loss(aux_outputs, label_batch, softmax=True) loss += 0.4 * (0.5 * loss_ce_aux + 0.5 * loss_dice_aux) optimizer.zero_grad() loss.backward() optimizer.step() lr_ = base_lr * (1.0 - iter_num / max_iterations)**0.9 for param_group in optimizer.param_groups: param_group['lr'] = lr_ iter_num = iter_num + 1 writer.add_scalar('info/lr', lr_, iter_num) writer.add_scalar('info/total_loss', loss, iter_num) writer.add_scalar('info/loss_ce', loss_ce, iter_num) logging.info('epoch %d iteration %d : loss : %f, loss_ce: %f' % (epoch_num, iter_num, loss.item(), loss_ce.item())) if iter_num % 20 == 0: image = image_batch[1, 0:1, :, :] image = (image - image.min()) / (image.max() - image.min()) writer.add_image('train/Image', image, iter_num) outputs = torch.argmax(torch.softmax(outputs, dim=1), dim=1, keepdim=True) writer.add_image('train/Prediction', outputs[1, ...] * 50, iter_num) labs = label_batch[1, ...].unsqueeze(0) * 50 writer.add_image('train/GroundTruth', labs, iter_num) eval_interval = 5 if (epoch_num + 1) % eval_interval == 0: tumor_dice = inference(args, model, epoch_num + 1) model.train() if args.model == 'deeplab_resnest': model.mode = 'TRAIN' writer.add_scalar('info/tumor_dice', tumor_dice, iter_num) if tumor_dice > best_performance: best_performance = tumor_dice save_mode_path = os.path.join( snapshot_path, 'best_model_ep' + str(epoch_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) save_interval = 50 # int(max_epoch/6) if epoch_num > int( max_epoch / 2) and (epoch_num + 1) % save_interval == 0: save_mode_path = os.path.join(snapshot_path, 'epoch_' + str(epoch_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) if epoch_num >= max_epoch - 1: save_mode_path = os.path.join(snapshot_path, 'epoch_' + str(epoch_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) if args.pretrain_epoch != -1: logdir = snapshot_path[:snapshot_path.rfind('/') + 1] with open(logdir + 'log_all.txt', "a") as logfile: logfile.write( f'{args.pretrain_epoch}: {best_performance}\n') iterator.close() break writer.close() return "Training Finished!"