def __init__(self, args): self.args = args self.saver = Saver(args) self.saver.save_experiment_config() self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() self.logger = self.saver.create_logger() kwargs = {'num_workers': args.workers, 'pin_memory': False} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader(args, **kwargs) self.model = EDCNet(args.rgb_dim, args.event_dim, num_classes=self.nclass, use_bn=True) train_params = [{'params': self.model.random_init_params(), 'lr': 10*args.lr, 'weight_decay': 10*args.weight_decay}, {'params': self.model.fine_tune_params(), 'lr': args.lr, 'weight_decay': args.weight_decay}] self.optimizer = torch.optim.Adam(train_params, lr=args.lr, weight_decay=args.weight_decay) if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.to(self.args.device) if args.use_balanced_weights: root_dir = Path.db_root_dir(args.dataset)[0] if isinstance(Path.db_root_dir(args.dataset), list) else Path.db_root_dir(args.dataset) classes_weights_path = os.path.join(root_dir, args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass, classes_weights_path) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.criterion_event = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode='event') self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader), warmup_epochs=5) self.evaluator = Evaluator(self.nclass, self.logger) self.saver.save_model_summary(self.model) self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location='cuda:0') args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) if args.ft: args.start_epoch = 0
def init_discriminator(self, args): # init D self.discriminator_model = FCDiscriminator(num_classes=2).cuda() self.interp = nn.Upsample(size=400, mode='bilinear') self.disc_criterion = SegmentationLosses( weight=None, cuda=args.cuda).build_loss(mode=args.loss_type) return
def init_optimizer(self, args): self.generator_criterion = SegmentationLosses( weight=None, cuda=args.cuda).build_loss( mode='bce') #torch.nn.BCELoss(reduce ='mean') self.generator_params = [{ 'params': self.generator_model.module.get_1x_lr_params(), 'lr': args.lr }, { 'params': self.generator_model.module.get_10x_lr_params(), 'lr': args.lr * 10 }] self.discriminator_params = [{ 'params': self.discriminator_model.parameters(), 'lr': args.lr * 5 }] self.model_optim = torch.optim.Adadelta(self.generator_params + self.discriminator_params) self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, lr_step=30, iters_per_epoch=100)
def test_gradients(): from utils.loss import SegmentationLosses import numpy as np from dataloaders import make_dataloader from torch.utils.data import DataLoader import matplotlib.pyplot as plt from dataloaders.utils import map_segmentation_to_colors import sys kwargs = {'pin_memory': True, 'init_set': 'set_dummy.txt'} _, train_loader, _, _, num_classes = make_dataloader('active_cityscapes_region', 513, 513, 1, True, **kwargs) model = DeepLab(backbone='mobilenet', output_stride=16, mc_dropout=False) train_params = [{'params': model.get_1x_lr_params(), 'lr': 0.001}, {'params': model.get_10x_lr_params(), 'lr': 0.001 * 10}] optimizer = torch.optim.SGD(train_params, momentum=0.9, weight_decay=5e-4, nesterov=False) criterion = SegmentationLosses(weight=None, cuda=True).build_loss(mode='ce') model = model.cuda() model.eval() for name, param in model.named_parameters(): if param.requires_grad: print(name) print(param) break display = False for i, sample in enumerate(train_loader): image, target = sample['image'], sample['label'] image, target = image.cuda(), target.cuda() if display: gt_colored = map_segmentation_to_colors(np.array(target[0].cpu().numpy()).astype(np.uint8), 'cityscapes') image_unnormalized = ((np.transpose(image[0].cpu().numpy(), axes=[1, 2, 0]) * (0.229, 0.224, 0.225) + (0.485, 0.456, 0.406)) * 255).astype(np.uint8) plt.figure() plt.title('display') plt.subplot(211) plt.imshow(image_unnormalized) plt.subplot(212) plt.imshow(gt_colored) plt.show() optimizer.zero_grad() output = model(image) loss = criterion(output, target) loss.backward() for name, param in model.named_parameters(): if param.requires_grad: print(name) print(param.grad) input() optimizer.step() sys.exit(0)
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # PATH = args.path # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader(args, **kwargs) # Define network model = SCNN(nclass=self.nclass,backbone=args.backbone,output_stride=args.out_stride,cuda = args.cuda) # Define Optimizer optimizer = torch.optim.SGD(model.parameters(),args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion weight = None self.criterion = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) # patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch']))
def initialize(self): args = self.args model = DeepLabAccuracyPredictor(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn, mc_dropout=False, enet=args.architecture == 'enet', symmetry=args.symmetry) train_params = model.get_param_list(args.lr, args.architecture == 'enet', args.symmetry) if args.optimizer == 'SGD': optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) elif args.optimizer == 'Adam': optimizer = torch.optim.Adam(train_params, weight_decay=args.weight_decay) else: raise NotImplementedError if args.use_balanced_weights: weight = calculate_weights_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion_deeplab = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.criterion_unet = SegmentationLosses(weight=torch.FloatTensor( [args.weight_wrong_label_unet, 1 - args.weight_wrong_label_unet]), cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer self.deeplab_evaluator = Evaluator(self.nclass) self.unet_evaluator = Evaluator(2) if args.use_lr_scheduler: self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) else: self.scheduler = None if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() self.best_pred = 0.0
def __init__(self, config, args): self.args = args self.config = config self.vis = visdom.Visdom(env=os.getcwd().split('/')[-1]) # Define Dataloader self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader(config) # Define network model = DeepLab(num_classes=self.nclass, backbone=config.backbone, output_stride=config.out_stride, sync_bn=config.sync_bn, freeze_bn=config.freeze_bn) train_params = [{'params': model.get_1x_lr_params(), 'lr': config.lr}, {'params': model.get_10x_lr_params(), 'lr': config.lr * 10}] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=config.momentum, weight_decay=config.weight_decay) # Define Criterion # whether to use class balanced weights self.criterion = SegmentationLosses(weight=None, cuda=args.cuda).build_loss(mode=config.loss) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(config.lr_scheduler, config.lr, config.T, len(self.train_loader), config.lr_step, config.warmup_epochs) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model) patch_replication_callback(self.model) # cudnn.benchmark = True self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format(args.resume)) checkpoint = torch.load(args.resume) if args.cuda: self.model.module.load_state_dict(checkpoint) else: self.model.load_state_dict(checkpoint, map_location=torch.device('cpu')) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, args.start_epoch))
def __init__(self, para): self.args = para # Define Saver self.saver = Saver(para) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() self.train_loader, self.val_loader, self.test_loader, self.nclass = dataloader( para) # Define network model = DeepLab(num_classes=self.nclass, backbone=para.backbone, output_stride=para.out_stride, sync_bn=para.sync_bn, freeze_bn=para.freeze_bn) train_params = [{ 'params': model.get_1x_lr_params(), 'lr': para.lr }, { 'params': model.get_10x_lr_params(), 'lr': para.lr * 10 }] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=para.momentum, weight_decay=para.weight_decay, nesterov=para.nesterov) # Define Criterion self.criterion = SegmentationLosses( weight=None, cuda=True).build_loss(mode=para.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(para.lr_scheduler, para.lr, para.epochs, len(self.train_loader)) self.model = torch.nn.DataParallel(self.model) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0
def __init__(self, args): self.args = args kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_dataloader( args.dataset, args.base_size, args.crop_size, args.batch_size, args.overfit, **kwargs) self.model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) self.evaluator = Evaluator(self.nclass) if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() if args.use_balanced_weights: classes_weights_path = os.path.join(constants.DATASET_ROOT, args.dataset, 'class_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weights_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) checkpoint = torch.load(args.resume) if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) print( f'=> loaded checkpoint {args.resume} (epoch {checkpoint["epoch"]})' ) self.visualizations_folder = os.path.join( os.path.dirname(os.path.realpath(args.resume)), constants.VISUALIZATIONS_FOLDER) if not os.path.exists(self.visualizations_folder): os.makedirs(self.visualizations_folder)
def __init__(self, args): self.args = args self.saver = Saver(args) self.saver.save_experiment_config() kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) # self.model = OCRNet(self.nclass) self.model = build_model(2, [32, 32], '44330020') self.optimizer = torch.optim.SGD(self.model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) if args.use_balanced_weights: weight = torch.tensor([0.2, 0.8], dtype=torch.float32) else: weight = None self.criterion = SegmentationLosses( weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.evaluator = Evaluator(self.nclass) self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) if args.cuda: self.model = self.model.cuda() self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) if args.ft: args.start_epoch = 0
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Dataloader if args.dataset == 'Cityscapes': kwargs = {'num_workers': args.num_workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.num_class = make_data_loader(args, **kwargs) # Define network if args.net == 'resnet101': blocks = [2,4,23,3] fpn = FPN(blocks, self.num_class, back_bone=args.net) # Define Optimizer self.lr = self.args.lr if args.optimizer == 'adam': self.lr = self.lr * 0.1 optimizer = torch.optim.Adam(fpn.parameters(), lr=args.lr, momentum=0, weight_decay=args.weight_decay) elif args.optimizer == 'sgd': optimizer = torch.optim.SGD(fpn.parameters(), lr=args.lr, momentum=0, weight_decay=args.weight_decay) # Define Criterion if args.dataset == 'Cityscapes': weight = None self.criterion = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode='ce') self.model = fpn self.optimizer = optimizer # Define Evaluator self.evaluator = Evaluator(self.num_class) # multiple mGPUs if args.mGPUs: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) # Using cuda if args.cuda: self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 self.lr_stage = [68, 93] self.lr_staget_ind = 0
def __init__(self, args): self.args = args # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} val_set = pascal.VOCSegmentation(args, split='val') self.nclass = val_set.NUM_CLASSES self.val_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False, **kwargs) # Define network self.model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) self.criterion = SegmentationLosses( weight=None, cuda=args.cuda).build_loss(mode=args.loss_type) # Define Evaluator self.evaluator = Evaluator(self.nclass) # Using cuda if args.cuda: print('device_ids', self.args.gpu_ids) self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch']))
def __init__(self, args): self.args = args self.args.batchnorm_function = torch.nn.BatchNorm2d # Define Dataloader self.nclass = self.args.num_classes # Define network model = generate_net(self.args) self.model = model self.evaluator = Evaluator(self.nclass) self.criterion = SegmentationLosses(cuda=True).build_loss(mode='ce') # Using cuda if self.args.cuda: self.model = self.model.cuda() # Resuming checkpoint _, _, _ = load_pretrained_mode(self.model, checkpoint_path=self.args.resume)
def __init__(self,args): self.args = args self.nclass = 4 self.save_fold = 'brain_re/brain_cedice' mkdir(self.save_fold) self.name = self.save_fold.split('/')[-1].split('_')[-1] #===for brain========================== # self.nclass = 4 # self.save_fold = 'brain_re' #====================================== net = segModel(self.args,self.nclass) net.build_model() model = net.model #load params resume = args.resume self.model = torch.nn.DataParallel(model) self.model = self.model.cuda() print('==>Load model...') if not resume is None: checkpoint = torch.load(resume) # model.load_state_dict(checkpoint) model.load_state_dict(checkpoint['state_dict']) self.model = model print('==>loding loss func...') self.criterion = SegmentationLosses(cuda=args.cuda).build_loss(mode=args.loss_type) #define evaluator self.evaluator = Evaluator(self.nclass) #get data path root_path = Path.db_root_dir(self.args.dataset) if self.args.dataset == 'drive': folder = 'test' self.test_img = os.path.join(root_path, folder, 'images') self.test_label = os.path.join(root_path, folder, '1st_manual') self.test_mask = os.path.join(root_path, folder, 'mask') elif self.args.dataset == 'brain': path = root_path+'/Bra-pickle' valid_path = '../data/Brain/test.csv' self.valid_set = get_dataset(path,valid_path) print('loading test data...') #define data self.test_loader = None
def test(model_path): args = makeargs() kwargs = {'num_workers': args.workers, 'pin_memory': True} train_loader, val_loader, test_loader, nclass = make_data_loader(args, **kwargs) print('Loading model...') model = DeepLab(num_classes=8, backbone='drn', output_stride=args.output_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) model.eval() checkpoint = torch.load(model_path) model = model.cuda() model.load_state_dict(checkpoint['state_dict']) print('Done') criterion = SegmentationLosses(weight=None, cuda=args.cuda).build_loss(mode=args.loss_type) evaluator = Evaluator(nclass) evaluator.reset() print('Model infering') test_dir = 'test_example1' test_loss = 0.0 tbar = tqdm(test_loader, desc='\r') for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] image, target = image.cuda(), target.cuda() with torch.no_grad(): # output = model(image) loss = criterion(output, target) test_loss += loss.item() tbar.set_description('Test loss: %.3f' % (test_loss / (i + 1))) pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) evaluator.add_batch(target, pred) print(image.shape) Acc = evaluator.Pixel_Accuracy() mIoU = evaluator.Mean_Intersection_over_Union() print('testing:') print("Acc:{}, mIoU:{},".format(Acc, mIoU)) print('Loss: %.3f' % test_loss)
def __init__(self, args, model, train_set, val_set, test_set, class_weights, saver): self.args = args self.saver = saver self.saver.save_experiment_config() self.train_dataloader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) self.val_dataloader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) self.test_dataloader = DataLoader(test_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) self.train_summary = TensorboardSummary(os.path.join(self.saver.experiment_dir, "train")) self.train_writer = self.train_summary.create_summary() self.val_summary = TensorboardSummary(os.path.join(self.saver.experiment_dir, "validation")) self.val_writer = self.val_summary.create_summary() self.model = model self.dataset_size = {'train': len(train_set), 'val': len(val_set), 'test': len(test_set)} train_params = [{'params': model.get_1x_lr_params(), 'lr': args.lr}, {'params': model.get_10x_lr_params(), 'lr': args.lr * 10}] if args.use_balanced_weights: weight = torch.from_numpy(class_weights.astype(np.float32)) else: weight = None if args.optimizer == 'SGD': print('Using SGD') self.optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) elif args.optimizer == 'Adam': print('Using Adam') self.optimizer = torch.optim.Adam(train_params, weight_decay=args.weight_decay) else: raise NotImplementedError self.lr_scheduler = None if args.use_lr_scheduler: if args.lr_scheduler == 'step': print('Using step lr scheduler') self.lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=[int(x) for x in args.step_size.split(",")], gamma=0.1) self.criterion = SegmentationLosses(weight=weight, ignore_index=255, cuda=args.cuda).build_loss(mode=args.loss_type) self.evaluator = Evaluator(train_set.num_classes) self.best_pred = 0.0
def define_pixel_criterion(self): if self.args.use_balanced_weights: classes_weights_path = os.path.join(self.args.dataset_dir, 'classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(self.args.dataset_dir, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) print(f"Classes weights : {weight}") elif self.args.skip_classes is not None: weight = torch.from_numpy(self.args.skip_weights.astype( np.float32)) print(f"Classes weights : {weight}") else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=self.args.cuda).build_loss(mode=self.args.loss_type)
def __init__(self, cfgfile): self.args = parse_cfg(cfgfile) self.nclass = int(self.args['nclass']) model = DeepLab(num_classes=self.nclass, backbone=self.args['backbone'], output_stride=int(self.args['out_stride']), sync_bn=bool(self.args['sync_bn']), freeze_bn=bool(self.args['freeze_bn'])) weight = None self.criterion = SegmentationLosses( weight=weight, cuda=True).build_loss(mode=self.args['loss_type']) self.model = model self.evaluator = Evaluator(self.nclass) # Using cuda self.model = self.model.cuda() self.model = torch.nn.DataParallel(self.model, device_ids=[0]) patch_replication_callback(self.model) self.resume = self.args['resume'] # Resuming checkpoint if self.resume is not None: if not os.path.isfile(self.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( self.resume)) checkpoint = torch.load(self.resume) self.model.module.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( self.resume, checkpoint['epoch']))
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) if not args.test: self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) if self.args.norm == 'gn': norm = gn elif self.args.norm == 'bn': if self.args.sync_bn: norm = syncbn else: norm = bn elif self.args.norm == 'abn': if self.args.sync_bn: norm = syncabn(self.args.gpu_ids) else: norm = abn else: print("Please check the norm.") exit() # Define network if self.args.model == 'deeplabv3+': model = DeepLab(args=self.args, num_classes=self.nclass) elif self.args.model == 'deeplabv3': model = DeepLabv3(Norm=self.args.norm, backbone=args.backbone, output_stride=args.out_stride, num_classes=self.nclass, freeze_bn=args.freeze_bn) elif self.args.model == 'fpn': model = FPN(args=args, num_classes=self.nclass) ''' model.cuda() summary(model, input_size=(3, 720, 1280)) exit() ''' self.classifier = Classifier(self.nclass) train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join( Path.db_root_dir(args.dataset), args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) #patch_replication_callback(self.model) self.model = self.model.cuda() self.classifier = torch.nn.DataParallel( self.classifier, device_ids=self.args.gpu_ids) self.classifier = self.classifier.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) if args.ft: args.start_epoch = 0 else: args.start_epoch = checkpoint['epoch'] if args.cuda: #self.model.module.load_state_dict(checkpoint['state_dict']) pretrained_dict = checkpoint['state_dict'] model_dict = {} state_dict = self.model.module.state_dict() for k, v in pretrained_dict.items(): if k in state_dict: model_dict[k] = v state_dict.update(model_dict) self.model.module.load_state_dict(state_dict) else: #self.model.load_state_dict(checkpoint['state_dict']) pretrained_dict = checkpoint['state_dict'] model_dict = {} state_dict = self.model.state_dict() for k, v in pretrained_dict.items(): if k in state_dict: model_dict[k] = v state_dict.update(model_dict) self.model.load_state_dict(state_dict) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) elif args.decoder is not None: if not os.path.isfile(args.decoder): raise RuntimeError( "=> no checkpoint for decoder found at '{}'".format( args.decoder)) checkpoint = torch.load(args.decoder) args.start_epoch = 0 # As every time loads decoder only should be finetuning if args.cuda: decoder_dict = checkpoint['state_dict'] model_dict = {} state_dict = self.model.module.state_dict() for k, v in decoder_dict.items(): if not 'aspp' in k: continue if k in state_dict: model_dict[k] = v state_dict.update(model_dict) self.model.module.load_state_dict(state_dict) else: raise NotImplementedError("Please USE CUDA!!!") if args.classifier is None: raise NotImplementedError("Classifier should be loaded") else: if not os.path.isfile(args.classifier): raise RuntimeError( "=> no checkpoint for clasifier found at '{}'".format( args.classifier)) checkpoint = torch.load(args.classifier) s_dict = checkpoint['state_dict'] model_dict = {} state_dict = self.classifier.state_dict() for k, v in s_dict.items(): if k in state_dict: model_dict[k] = v state_dict.update(model_dict) self.classifier.load_state_dict(state_dict) print("Classifier checkpoint successfully loaded") # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) cell_path = os.path.join(args.saved_arch_path, 'genotype.npy') network_path_space = os.path.join(args.saved_arch_path, 'network_path_space.npy') new_cell_arch = np.load(cell_path) new_network_arch = np.load(network_path_space) # Define network model = newModel(network_arch=new_network_arch, cell_arch=new_cell_arch, num_classes=self.nclass, num_layers=12) # output_stride=args.out_stride, # sync_bn=args.sync_bn, # freeze_bn=args.freeze_bn) self.decoder = Decoder(self.nclass, 'autodeeplab', args, False) # TODO: look into these # TODO: ALSO look into different param groups as done int deeplab below # train_params = [{'params': model.get_1x_lr_params(), 'lr': args.lr}, # {'params': model.get_10x_lr_params(), 'lr': args.lr * 10}] # train_params = [{'params': model.parameters(), 'lr': args.lr}] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join( Path.db_root_dir(args.dataset), args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler( args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) #TODO: use min_lr ? # TODO: Figure out if len(self.train_loader) should be devided by two ? in other module as well # Using cuda if args.cuda: if (torch.cuda.device_count() > 1 or args.load_parallel): self.model = torch.nn.DataParallel(self.model.cuda()) patch_replication_callback(self.model) self.model = self.model.cuda() print('cuda finished') # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # if the weights are wrapped in module object we have to clean it if args.clean_module: self.model.load_state_dict(checkpoint['state_dict']) state_dict = checkpoint['state_dict'] new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove 'module.' of dataparallel new_state_dict[name] = v self.model.load_state_dict(new_state_dict) else: if (torch.cuda.device_count() > 1 or args.load_parallel): self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) # Define network model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion self.criterion = SegmentationLosses(cuda=args.cuda) self.model, self.optimizer = model, optimizer self.contexts = TemporalContexts(history_len=5) # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning or in validation/test mode if args.ft or args.mode == "val" or args.mode == "test": args.start_epoch = 0 self.best_pred = 0.0
class Trainer(object): def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) # Define network model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion self.criterion = SegmentationLosses(cuda=args.cuda) self.model, self.optimizer = model, optimizer self.contexts = TemporalContexts(history_len=5) # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning or in validation/test mode if args.ft or args.mode == "val" or args.mode == "test": args.start_epoch = 0 self.best_pred = 0.0 def training(self, epoch): train_loss = 0.0 self.model.train() tbar = tqdm(self.train_loader) num_img_tr = len(self.train_loader) for i, sample in enumerate(tbar): image, region_prop, target = sample['image'], sample['rp'], sample[ 'label'] if self.args.cuda: image, region_prop, target = image.cuda(), region_prop.cuda( ), target.cuda() self.scheduler(self.optimizer, i, epoch, self.best_pred) self.optimizer.zero_grad() output = self.model(image, region_prop) loss = self.criterion.CrossEntropyLoss( output, target, weight=torch.from_numpy( calculate_weights_batch(sample, self.nclass).astype(np.float32))) loss.backward() self.optimizer.step() train_loss += loss.item() tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) self.writer.add_scalar('train/total_loss_iter', loss.item(), i + num_img_tr * epoch) pred = output.clone().data.cpu() pred_softmax = F.softmax(pred, dim=1).numpy() pred = np.argmax(pred.numpy(), axis=1) # Plot prediction every 20th iter if i % (num_img_tr // 20) == 0: global_step = i + num_img_tr * epoch self.summary.vis_grid(self.writer, self.args.dataset, image.data.cpu().numpy()[0], target.data.cpu().numpy()[0], pred[0], region_prop.data.cpu().numpy()[0], pred_softmax[0], global_step, split="Train") self.writer.add_scalar('train/total_loss_epoch', train_loss / num_img_tr, epoch) print('Loss: {}'.format(train_loss / num_img_tr)) if self.args.no_val or self.args.save_all: # save checkpoint every epoch is_best = False self.saver.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best, filename='checkpoint_' + str(epoch + 1) + '_.pth.tar') def validation(self, epoch): if self.args.mode == "train" or self.args.mode == "val": loader = self.val_loader elif self.args.mode == "test": loader = self.test_loader self.model.eval() self.evaluator.reset() tbar = tqdm(loader, desc='\r') test_loss = 0.0 idr_thresholds = [0.20, 0.30, 0.40, 0.50, 0.60, 0.65] num_itr = len(loader) for i, sample in enumerate(tbar): image, region_prop, target = sample['image'], sample['rp'], sample[ 'label'] # orig_region_prop = region_prop.clone() # region_prop = self.contexts.temporal_prop(image.numpy(),region_prop.numpy()) if self.args.cuda: image, region_prop, target = image.cuda(), region_prop.cuda( ), target.cuda() with torch.no_grad(): output = self.model(image, region_prop) # loss = self.criterion.CrossEntropyLoss(output,target,weight=torch.from_numpy(calculate_weights_batch(sample,self.nclass).astype(np.float32))) # test_loss += loss.item() # tbar.set_description('Test loss: %.3f' % (test_loss / (i + 1))) output = output.detach().data.cpu() pred_softmax = F.softmax(output, dim=1).numpy() pred = np.argmax(pred_softmax, axis=1) target = target.cpu().numpy() image = image.cpu().numpy() region_prop = region_prop.cpu().numpy() # orig_region_prop = orig_region_prop.numpy() # Add batch sample into evaluator self.evaluator.add_batch(target, pred) # Append buffer with original context(before temporal propagation) # self.contexts.append_buffer(image[0],orig_region_prop[0],pred[0]) global_step = i + num_itr * epoch self.summary.vis_grid(self.writer, self.args.dataset, image[0], target[0], pred[0], region_prop[0], pred_softmax[0], global_step, split="Validation") # Fast test during the training mIoU = self.evaluator.Mean_Intersection_over_Union() recall, precision = self.evaluator.pdr_metric(class_id=2) idr_avg = np.array([ self.evaluator.get_idr(class_value=2, threshold=value) for value in idr_thresholds ]) false_idr = self.evaluator.get_false_idr(class_value=2) instance_iou = self.evaluator.get_instance_iou(threshold=0.20, class_value=2) # self.writer.add_scalar('val/total_loss_epoch', test_loss, epoch) self.writer.add_scalar('val/mIoU', mIoU, epoch) self.writer.add_scalar('val/Recall/per_epoch', recall, epoch) self.writer.add_scalar('IDR/per_epoch(0.20)', idr_avg[0], epoch) self.writer.add_scalar('IDR/avg_epoch', np.mean(idr_avg), epoch) self.writer.add_scalar('False_IDR/epoch', false_idr, epoch) self.writer.add_scalar('Instance_IOU/epoch', instance_iou, epoch) self.writer.add_histogram( 'Prediction_hist', self.evaluator.pred_labels[self.evaluator.gt_labels == 2], epoch) print('Validation:') # print('Loss: %.3f' % test_loss) # print('Recall/PDR:{}'.format(recall)) print('IDR:{}'.format(idr_avg[0])) print('False Positive Rate: {}'.format(false_idr)) print('Instance_IOU: {}'.format(instance_iou)) if self.args.mode == "train": new_pred = mIoU if new_pred > self.best_pred: is_best = True self.best_pred = new_pred self.saver.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best) else: pass
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() self.use_amp = True if (APEX_AVAILABLE and args.use_amp) else False self.opt_level = args.opt_level kwargs = { 'num_workers': args.workers, 'pin_memory': True, 'drop_last': True } self.train_loaderA, self.train_loaderB, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) if args.use_balanced_weights: classes_weights_path = os.path.join( Path.db_root_dir(args.dataset), args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: raise NotImplementedError #if so, which trainloader to use? # weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) # Define network model = AutoDeeplab(self.nclass, 12, self.criterion, self.args.filter_multiplier, self.args.block_multiplier, self.args.step) optimizer = torch.optim.SGD(model.weight_parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) self.model, self.optimizer = model, optimizer self.architect_optimizer = torch.optim.Adam( self.model.arch_parameters(), lr=args.arch_lr, betas=(0.9, 0.999), weight_decay=args.arch_weight_decay) # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loaderA), min_lr=args.min_lr) # TODO: Figure out if len(self.train_loader) should be devided by two ? in other module as well # Using cuda if args.cuda: self.model = self.model.cuda() # mixed precision if self.use_amp and args.cuda: keep_batchnorm_fp32 = True if (self.opt_level == 'O2' or self.opt_level == 'O3') else None # fix for current pytorch version with opt_level 'O1' if self.opt_level == 'O1' and torch.__version__ < '1.3': for module in self.model.modules(): if isinstance(module, torch.nn.modules.batchnorm._BatchNorm): # Hack to fix BN fprop without affine transformation if module.weight is None: module.weight = torch.nn.Parameter( torch.ones(module.running_var.shape, dtype=module.running_var.dtype, device=module.running_var.device), requires_grad=False) if module.bias is None: module.bias = torch.nn.Parameter( torch.zeros(module.running_var.shape, dtype=module.running_var.dtype, device=module.running_var.device), requires_grad=False) # print(keep_batchnorm_fp32) self.model, [self.optimizer, self.architect_optimizer] = amp.initialize( self.model, [self.optimizer, self.architect_optimizer], opt_level=self.opt_level, keep_batchnorm_fp32=keep_batchnorm_fp32, loss_scale="dynamic") print('cuda finished') # Using data parallel if args.cuda and len(self.args.gpu_ids) > 1: if self.opt_level == 'O2' or self.opt_level == 'O3': print( 'currently cannot run with nn.DataParallel and optimization level', self.opt_level) self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) print('training on multiple-GPUs') #checkpoint = torch.load(args.resume) #print('about to load state_dict') #self.model.load_state_dict(checkpoint['state_dict']) #print('model loaded') #sys.exit() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # if the weights are wrapped in module object we have to clean it if args.clean_module: self.model.load_state_dict(checkpoint['state_dict']) state_dict = checkpoint['state_dict'] new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove 'module.' of dataparallel new_state_dict[name] = v # self.model.load_state_dict(new_state_dict) copy_state_dict(self.model.state_dict(), new_state_dict) else: if torch.cuda.device_count() > 1 or args.load_parallel: # self.model.module.load_state_dict(checkpoint['state_dict']) copy_state_dict(self.model.module.state_dict(), checkpoint['state_dict']) else: # self.model.load_state_dict(checkpoint['state_dict']) copy_state_dict(self.model.state_dict(), checkpoint['state_dict']) if not args.ft: # self.optimizer.load_state_dict(checkpoint['optimizer']) copy_state_dict(self.optimizer.state_dict(), checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
def __init__(self, config, args): self.args = args self.config = config self.visdom = args.visdom if args.visdom: self.vis = visdom.Visdom(env=os.getcwd().split('/')[-1], port=8888) # Define Dataloader self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( config) self.target_train_loader, self.target_val_loader, self.target_test_loader, _ = make_target_data_loader( config) # Define network self.model = DeepLab(num_classes=self.nclass, backbone=config.backbone, output_stride=config.out_stride, sync_bn=config.sync_bn, freeze_bn=config.freeze_bn) self.D = Discriminator(num_classes=self.nclass, ndf=16) train_params = [{ 'params': self.model.get_1x_lr_params(), 'lr': config.lr }, { 'params': self.model.get_10x_lr_params(), 'lr': config.lr * config.lr_ratio }] # Define Optimizer self.optimizer = torch.optim.SGD(train_params, momentum=config.momentum, weight_decay=config.weight_decay) self.D_optimizer = torch.optim.Adam(self.D.parameters(), lr=config.lr, betas=(0.9, 0.99)) # Define Criterion # whether to use class balanced weights self.criterion = SegmentationLosses( weight=None, cuda=args.cuda).build_loss(mode=config.loss) self.entropy_mini_loss = MinimizeEntropyLoss() self.bottleneck_loss = BottleneckLoss() self.instance_loss = InstanceLoss() # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(config.lr_scheduler, config.lr, config.epochs, len(self.train_loader), config.lr_step, config.warmup_epochs) self.summary = TensorboardSummary('./train_log') # labels for adversarial training self.source_label = 0 self.target_label = 1 # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model) patch_replication_callback(self.model) # cudnn.benchmark = True self.model = self.model.cuda() self.D = torch.nn.DataParallel(self.D) patch_replication_callback(self.D) self.D = self.D.cuda() self.best_pred_source = 0.0 self.best_pred_target = 0.0 # Resuming checkpoint if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) if args.cuda: self.model.module.load_state_dict(checkpoint) else: self.model.load_state_dict(checkpoint, map_location=torch.device('cpu')) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, args.start_epoch))
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader if args.dataset == 'CamVid': size = 512 train_file = os.path.join(os.getcwd() + "\\data\\CamVid", "train.csv") val_file = os.path.join(os.getcwd() + "\\data\\CamVid", "val.csv") print('=>loading datasets') train_data = CamVidDataset(csv_file=train_file, phase='train') self.train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) val_data = CamVidDataset(csv_file=val_file, phase='val', flip_rate=0) self.val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) self.num_class = 32 elif args.dataset == 'Cityscapes': kwargs = {'num_workers': args.num_workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.num_class = make_data_loader(args, **kwargs) # Define network if args.net == 'resnet101': blocks = [2,4,23,3] fpn = FPN(blocks, self.num_class, back_bone=args.net) # Define Optimizer self.lr = self.args.lr if args.optimizer == 'adam': self.lr = self.lr * 0.1 optimizer = torch.optim.Adam(fpn.parameters(), lr=args.lr, momentum=0, weight_decay=args.weight_decay) elif args.optimizer == 'sgd': optimizer = torch.optim.SGD(fpn.parameters(), lr=args.lr, momentum=0, weight_decay=args.weight_decay) # Define Criterion if args.dataset == 'CamVid': self.criterion = nn.CrossEntropyLoss() elif args.dataset == 'Cityscapes': weight = None self.criterion = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode='ce') self.model = fpn self.optimizer = optimizer # Define Evaluator self.evaluator = Evaluator(self.num_class) # multiple mGPUs if args.mGPUs: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) # Using cuda if args.cuda: self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume: output_dir = os.path.join(args.save_dir, args.dataset, args.checkname) runs = sorted(glob.glob(os.path.join(output_dir, 'experiment_*'))) run_id = int(runs[-1].split('_')[-1]) - 1 if runs else 0 experiment_dir = os.path.join(output_dir, 'experiment_{}'.format(str(run_id))) load_name = os.path.join(experiment_dir, 'checkpoint.pth.tar') if not os.path.isfile(load_name): raise RuntimeError("=> no checkpoint found at '{}'".format(load_name)) checkpoint = torch.load(load_name) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] self.lr = checkpoint['optimizer']['param_groups'][0]['lr'] print("=> loaded checkpoint '{}'(epoch {})".format(load_name, checkpoint['epoch'])) self.lr_stage = [68, 93] self.lr_staget_ind = 0
def __init__(self, batch_size=32, optimizer_name="Adam", lr=1e-3, weight_decay=1e-5, epochs=200, model_name="model01", gpu_ids=None, resume=None, tqdm=None, is_develop=False): """ args: batch_size = (int) batch_size of training and validation lr = (float) learning rate of optimization weight_decay = (float) weight decay of optimization epochs = (int) The number of epochs of training model_name = (string) The name of training model. Will be folder name. gpu_ids = (List) List of gpu_ids. (e.g. gpu_ids = [0, 1]). Use CPU, if it is None. resume = (Dict) Dict of some settings. (resume = {"checkpoint_path":PATH_of_checkpoint, "fine_tuning":True or False}). Learn from scratch, if it is None. tqdm = (tqdm Object) progress bar object. Set your tqdm please. Don't view progress bar, if it is None. """ # Set params self.batch_size = batch_size self.epochs = epochs self.start_epoch = 0 self.use_cuda = (gpu_ids is not None) and torch.cuda.is_available self.tqdm = tqdm self.use_tqdm = tqdm is not None # Define Utils. (No need to Change.) """ These are Project Modules. You may not have to change these. Saver: Save model weight. / <utils.saver.Saver()> TensorboardSummary: Write tensorboard file. / <utils.summaries.TensorboardSummary()> Evaluator: Calculate some metrics (e.g. Accuracy). / <utils.metrics.Evaluator()> """ ## ***Define Saver*** self.saver = Saver(model_name, lr, epochs) self.saver.save_experiment_config() ## ***Define Tensorboard Summary*** self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # ------------------------- # # Define Training components. (You have to Change!) """ These are important setting for training. You have to change these. make_data_loader: This creates some <Dataloader>s. / <dataloader.__init__> Modeling: You have to define your Model. / <modeling.modeling.Modeling()> Evaluator: You have to define Evaluator. / <utils.metrics.Evaluator()> Optimizer: You have to define Optimizer. / <utils.optimizer.Optimizer()> Loss: You have to define Loss function. / <utils.loss.Loss()> """ ## ***Define Dataloader*** self.train_loader, self.val_loader, self.test_loader, self.num_classes = make_data_loader( batch_size, is_develop=is_develop) ## ***Define Your Model*** self.model = Modeling(self.num_classes) ## ***Define Evaluator*** self.evaluator = Evaluator(self.num_classes) ## ***Define Optimizer*** self.optimizer = Optimizer(self.model.parameters(), optimizer_name=optimizer_name, lr=lr, weight_decay=weight_decay) ## ***Define Loss*** self.criterion = SegmentationLosses( weight=torch.tensor([1.0, 1594.0]).cuda()).build_loss('ce') # self.criterion = SegmentationLosses().build_loss('focal') # self.criterion = BCEDiceLoss() # ------------------------- # # Some settings """ You don't have to touch bellow code. Using cuda: Enable to use cuda if you want. Resuming checkpoint: You can resume training if you want. """ ## ***Using cuda*** if self.use_cuda: self.model = torch.nn.DataParallel(self.model, device_ids=gpu_ids).cuda() ## ***Resuming checkpoint*** """You can ignore bellow code.""" self.best_pred = 0.0 if resume is not None: if not os.path.isfile(resume["checkpoint_path"]): raise RuntimeError("=> no checkpoint found at '{}'".format( resume["checkpoint_path"])) checkpoint = torch.load(resume["checkpoint_path"]) self.start_epoch = checkpoint['epoch'] if self.use_cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if resume["fine_tuning"]: # resume params of optimizer, if run fine tuning. self.optimizer.load_state_dict(checkpoint['optimizer']) self.start_epoch = 0 self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( resume["checkpoint_path"], checkpoint['epoch']))
def __init__(self, args): self.args = args self.train_dir = './data_list/train_lite.csv' self.train_list = pd.read_csv(self.train_dir) self.val_dir = './data_list/val_lite.csv' self.val_list = pd.read_csv(self.val_dir) self.train_length = len(self.train_list) self.val_length = len(self.val_list) # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # 方式2 self.train_gen, self.val_gen, self.test_gen, self.nclass = make_data_loader2(args) # Define network model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) train_params = [{'params': model.get_1x_lr_params(), 'lr': args.lr}, {'params': model.get_10x_lr_params(), 'lr': args.lr * 10}] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # optimizer = torch.optim.Adam(train_params, weight_decay=args.weight_decay) # Define Criterion # self.criterion = SegmentationLosses(weight=None, cuda=args.cuda).build_loss(mode=args.loss_type) self.criterion1 = SegmentationLosses(weight=None, cuda=args.cuda).build_loss(mode='ce') self.criterion2= SegmentationLosses(weight=None, cuda=args.cuda).build_loss(mode='dice') self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, self.train_length) # Using cuda if args.cuda: self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: # self.model.module.load_state_dict(checkpoint['state_dict']) self.model.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
def main(): """Create the model and start the training.""" args = get_arguments() #从命令行获取参数 lt = time.localtime(time.time()) yyyy = str(lt.tm_year) mm = str(lt.tm_mon) dd = str(lt.tm_mday) hh = str(lt.tm_hour) mn = str(lt.tm_min) sc = str(lt.tm_sec) timename = '-' + yyyy + '-' + mm + '-' + dd + '-' + hh + '-' + mn + '-' + sc exp_name = 'Src2SH_srconly_lr' + str(args.learning_rate) + '_ep' + str( args.num_epoch) + '_' + str(args.input_size.split(',')[0] + timename) # print(exp_name) args.snapshot_dir = os.path.join(args.snapshot_root, exp_name) if os.path.exists(args.snapshot_dir) == False: os.makedirs(args.snapshot_dir) if os.path.exists(args.log_root) == False: os.makedirs(args.log_root) f = open(args.log_root + exp_name + '_log.txt', 'w') w, h = map(int, args.input_size.split(',')) input_size = (w, h) # Create network DeepLab_net = DeepLab(num_classes=args.num_classes, backbone='resnet', output_stride=16, sync_bn=False, freeze_bn=True) train_params = [{ 'params': DeepLab_net.get_1x_lr_params(), 'lr': args.learning_rate }, { 'params': DeepLab_net.get_10x_lr_params(), 'lr': args.learning_rate * 10 }] DeepLab_net = DeepLab_net.cuda() #加载source的数据集 src_loader = data.DataLoader(PotsdamDataSet(args.data_dir_src, args.data_list_src, crop_size=input_size, scale=False, mirror=False, mean=IMG_MEAN), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) #加载source中的验证集val val_loader = data.DataLoader(PotsdamDataSet(args.data_dir_tgt_val, args.data_list_tgt_val, crop_size=input_size, scale=False, mirror=False, mean=IMG_MEAN), batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True) num_batches = len(src_loader) optimizer = optim.SGD(train_params, lr=args.learning_rate, momentum=0.9, weight_decay=5e-4, nesterov=False) criterion = SegmentationLosses().build_loss(mode='ce') scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9) num_steps = args.num_epoch * num_batches loss_hist = np.zeros((num_steps, 2)) index_i = -1 OA_hist = 0.01 #miou大于该值,则对模型进行存储 for epoch in range(args.num_epoch): #if epoch==6: # return print('lr is {}'.format( optimizer.state_dict()['param_groups'][0]['lr'])) for batch_index, (src_data) in enumerate(src_loader): index_i += 1 tem_time = time.time() DeepLab_net.train() images, src_label, im_name = src_data images = images.cuda() #images shape: 2,3,512,512 src_label = src_label.cuda() #src_label shape:2,512,512 src_output = DeepLab_net(images) #src out shape:2,6,512,512 optimizer.zero_grad() cls_loss_value = criterion(src_output, src_label) _, predict_labels = torch.max(src_output, 1) #_保存最大值, predict_labels保存最大值对应的索引 lbl_pred = predict_labels.detach().cpu().numpy() lbl_true = src_label.detach().cpu().numpy() metrics_batch = [] for lt, lp in zip(lbl_true, lbl_pred): _, _, mean_iu, _ = label_accuracy_score( lt, lp, args.num_classes) metrics_batch.append(mean_iu) miu = np.mean(metrics_batch, axis=0) cls_loss_value.backward() loss_hist[index_i, 0] = cls_loss_value.item() loss_hist[index_i, 1] = miu optimizer.step() batch_time = time.time() - tem_time printfrq = 10 if (batch_index + 1) % printfrq == 0: print( 'epoch %d/%d: %d/%d, time: %.2f, miu = %.1f, cls_loss = %.3f \n' % (epoch + 1, args.num_epoch, batch_index + 1, num_batches, batch_time * printfrq, np.mean(loss_hist[index_i + 1 - printfrq:index_i + 1, 1]) * 100, np.mean(loss_hist[index_i + 1 - printfrq:index_i + 1, 0]))) f.write( 'epoch %d/%d: %d/%d, time: %.2f, miu = %.1f, cls_loss = %.3f \n' % (epoch + 1, args.num_epoch, batch_index + 1, num_batches, batch_time * printfrq, np.mean(loss_hist[index_i + 1 - printfrq:index_i + 1, 1]) * 100, np.mean(loss_hist[index_i + 1 - printfrq:index_i + 1, 0]))) f.flush() testfrq = (num_batches / 2) if (batch_index + 1) % testfrq == 0: #test_mIoU(f,model, data_loader, epoch,input_size, print_per_batches=10) #f是打开log.txt OA_new = test_mIoU(f, DeepLab_net, val_loader, epoch + 1, input_size, print_per_batches=10) # Saving the models if OA_new > OA_hist: f.write('Save Model\n') print('Save Model') model_name = exp_name + '_epoch' + repr( epoch + 1) + '_' + repr( (batch_index + 1) / testfrq) + '_miu_' + repr( int(OA_new * 1000)) + '.pth' torch.save(DeepLab_net.state_dict(), os.path.join(args.snapshot_dir, model_name)) OA_hist = OA_new scheduler.step() f.close() torch.save(DeepLab_net.state_dict(), os.path.join(args.snapshot_dir, exp_name + '_final.pth')) np.savez(args.snapshot_dir + exp_name + '_loss&miu_stat.npz', loss_hist=loss_hist) plotfig(loss_hist, args.snapshot_dir)
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) # Define network model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) print(self.nclass, args.backbone, args.out_stride, args.sync_bn, args.freeze_bn) #2 resnet 16 False False train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join( Path.db_root_dir(args.dataset), args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume, map_location='cpu') args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} #self.train_loader1, self.train_loader2, self.val_loader, self.test_loader, self.nclass = make_data_loader(args, **kwargs) self.train_loader1, self.train_loader2, self.val_loader, self.nclass = make_data_loader(args, **kwargs) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join(Path.db_root_dir(args.dataset), args.dataset+'_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) # Define network model = AutoDeeplab (self.nclass, 12, self.criterion, crop_size=self.args.crop_size) optimizer = torch.optim.SGD( model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay ) self.model, self.optimizer = model, optimizer # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() print ('cuda finished') # Define Optimizer self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader1)) self.architect = Architect (self.model, args) # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0