def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader(args, **kwargs) # Define network #================================== network ==============================================# network_G = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) network_D = networks.define_D(4, 64, netD='basic', n_layers_D=3, norm='batch', init_type='normal', init_gain=0.02, gpu_ids=self.args.gpu_ids) #=========================================================================================# train_params = [{'params': network_G.get_1x_lr_params(), 'lr': args.lr}, {'params': network_G.get_10x_lr_params(), 'lr': args.lr * 10}] # Define Optimizer #================================== network ==============================================# optimizer_G = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) optimizer_D = torch.optim.Adam(network_D.parameters(), lr=0.0002, betas=(0.5, 0.999)) #=========================================================================================# # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join(Path.db_root_dir(args.dataset), args.dataset+'_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None #======================================== criterion======================================# self.criterionGAN = networks.GANLoss('vanilla').to(args.gpu_ids[0]) ### set device manually self.criterionL1 = torch.nn.L1Loss() self.network_G, self.network_D = network_G, network_D self.optimizer_G, self.optimizer_D = optimizer_G, optimizer_D #========================================================================================# # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.network_G = torch.nn.DataParallel(self.network_G, device_ids=self.args.gpu_ids) patch_replication_callback(self.network_G) self.network_G = self.network_G.cuda() #====================== no resume ===================================================================# # Resuming checkpoint self.best_pred = 0.0 # if args.resume is not None: # if not os.path.isfile(args.resume): # raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume)) # checkpoint = torch.load(args.resume) # args.start_epoch = checkpoint['epoch'] # if args.cuda: # self.network_G.module.load_state_dict(checkpoint['state_dict']) # else: # self.network_G.load_state_dict(checkpoint['state_dict']) # if not args.ft: # self.optimizer.load_state_dict(checkpoint['optimizer']) # self.best_pred = checkpoint['best_pred'] # print("=> loaded checkpoint '{}' (epoch {})" # .format(args.resume, checkpoint['epoch'])) #=======================================================================================================# # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) # Define network self.t_net = DeepLab(num_classes=self.nclass, backbone='resnet101', output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) checkpoint = torch.load('pretrained/deeplab-resnet.pth.tar') self.t_net.load_state_dict(checkpoint['state_dict']) self.s_net = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) self.d_net = distiller.Distiller(self.t_net, self.s_net) print('Teacher Net: ') print(self.t_net) print('Student Net: ') print(self.s_net) print('the number of teacher model parameters: {}'.format( sum([p.data.nelement() for p in self.t_net.parameters()]))) print('the number of student model parameters: {}'.format( sum([p.data.nelement() for p in self.s_net.parameters()]))) self.distill_ratio = 1e-5 self.batch_size = args.batch_size distill_params = [{ 'params': self.s_net.get_1x_lr_params(), 'lr': args.lr }, { 'params': self.s_net.get_10x_lr_params(), 'lr': args.lr * 10 }, { 'params': self.d_net.Connectors.parameters(), 'lr': args.lr * 10 }] init_params = [{ 'params': self.d_net.Connectors.parameters(), 'lr': args.lr * 10 }] # # Define Optimizer self.optimizer = torch.optim.SGD(distill_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) self.init_optimizer = torch.optim.SGD(init_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join( Path.db_root_dir(args.dataset), args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.s_net = torch.nn.DataParallel(self.s_net).cuda() self.d_net = torch.nn.DataParallel(self.d_net).cuda() # Resuming checkpoint self.best_pred = 0.0 # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
def __init__(self, args): self.args = args if self.args.sync_bn: self.args.batchnorm_function = SynchronizedBatchNorm2d else: self.args.batchnorm_function = torch.nn.BatchNorm2d print(self.args) # Define Saver self.saver = Saver(self.args) # Define Tensorboard Summary self.summary = TensorboardSummary() self.writer = self.summary.create_summary(self.saver.experiment_dir) # Define Dataloader kwargs = {'num_workers': self.args.gpus, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader = make_data_loader( self.args, **kwargs) self.nclass = self.args.num_classes # Define network model = generate_net(self.args) train_params = [{ 'params': model.get_conv_weight_params(), 'lr': self.args.lr, 'weight_decay': self.args.weight_decay }, { 'params': model.get_conv_bias_params(), 'lr': self.args.lr * 2, 'weight_decay': 0 }] # Define Optimizer if self.args.optim_method == 'sgd': optimizer = torch.optim.SGD(train_params, momentum=self.args.momentum, lr=self.args.lr, weight_decay=0, nesterov=self.args.nesterov) elif self.args.optim_method == 'adagrad': optimizer = torch.optim.Adagrad( train_params, lr=self.args.lr, weight_decay=self.args.weight_decay) else: pass # Define Criterion # whether to use class balanced weights if self.args.use_balanced_weights: classes_weights_path = os.path.join(self.args.save_dir, self.args.dataset, 'classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(self.args.save_dir, self.args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)).type( torch.FloatTensor) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=self.args.cuda, foreloss_weight=args.foreloss_weight, seloss_weight=args.seloss_weight).build_loss( mode=self.args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) self.evaluator_inner = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(self.args.lr_scheduler, self.args.lr, self.args.epochs, len(self.train_loader)) self.model = self.model.cuda() # Resuming checkpoint self.args.start_epoch = 0 self.best_pred = 0.0 if self.args.resume is not None: optimizer, start_epoch, best_pred = load_pretrained_mode( self.model, checkpoint_path=self.args.resume) if not self.args.ft and optimizer is not None: self.optimizer.load_state_dict(optimizer) self.args.start_epoch = start_epoch self.best_pred = best_pred # Using cuda if self.args.cuda: self.model = torch.nn.DataParallel(self.model) patch_replication_callback(self.model) self.model = self.model.cuda()
def __init__(self, args): self.args = args # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) # Define network if args.sync_bn == True: BN = SynchronizedBatchNorm2d else: BN = nn.BatchNorm2d ### deeplabV3 start ### self.backbone_model = MobileNetV2(output_stride=args.out_stride, BatchNorm=BN) self.assp_model = ASPP(backbone=args.backbone, output_stride=args.out_stride, BatchNorm=BN) self.y_model = Decoder(num_classes=self.nclass, backbone=args.backbone, BatchNorm=BN) ### deeplabV3 end ### self.d_model = DomainClassifer(backbone=args.backbone, BatchNorm=BN) f_params = list(self.backbone_model.parameters()) + list( self.assp_model.parameters()) y_params = list(self.y_model.parameters()) d_params = list(self.d_model.parameters()) # Define Optimizer if args.optimizer == 'SGD': self.task_optimizer = torch.optim.SGD( f_params + y_params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) self.d_optimizer = torch.optim.SGD(d_params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) self.d_inv_optimizer = torch.optim.SGD( f_params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) self.c_optimizer = torch.optim.SGD(f_params + y_params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) elif args.optimizer == 'Adam': self.task_optimizer = torch.optim.Adam(f_params + y_params, lr=args.lr) self.d_optimizer = torch.optim.Adam(d_params, lr=args.lr) self.d_inv_optimizer = torch.optim.Adam(f_params, lr=args.lr) self.c_optimizer = torch.optim.Adam(f_params + y_params, lr=args.lr) else: raise NotImplementedError # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = 'dataloders\\datasets\\' + args.dataset + '_classes_weights.npy' if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(self.train_loader, self.nclass, classes_weights_path, self.args.dataset) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.task_loss = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.domain_loss = DomainLosses(cuda=args.cuda).build_loss() self.ca_loss = '' # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.backbone_model = torch.nn.DataParallel( self.backbone_model, device_ids=self.args.gpu_ids) self.assp_model = torch.nn.DataParallel( self.assp_model, device_ids=self.args.gpu_ids) self.y_model = torch.nn.DataParallel(self.y_model, device_ids=self.args.gpu_ids) self.d_model = torch.nn.DataParallel(self.d_model, device_ids=self.args.gpu_ids) patch_replication_callback(self.backbone_model) patch_replication_callback(self.assp_model) patch_replication_callback(self.y_model) patch_replication_callback(self.d_model) self.backbone_model = self.backbone_model.cuda() self.assp_model = self.assp_model.cuda() self.y_model = self.y_model.cuda() self.d_model = self.d_model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.backbone_model.module.load_state_dict( checkpoint['backbone_model_state_dict']) self.assp_model.module.load_state_dict( checkpoint['assp_model_state_dict']) self.y_model.module.load_state_dict( checkpoint['y_model_state_dict']) self.d_model.module.load_state_dict( checkpoint['d_model_state_dict']) else: self.backbone_model.load_state_dict( checkpoint['backbone_model_state_dict']) self.assp_model.load_state_dict( checkpoint['assp_model_state_dict']) self.y_model.load_state_dict(checkpoint['y_model_state_dict']) self.d_model.load_state_dict(checkpoint['d_model_state_dict']) if not args.ft: self.task_optimizer.load_state_dict( checkpoint['task_optimizer']) self.d_optimizer.load_state_dict(checkpoint['d_optimizer']) self.d_inv_optimizer.load_state_dict( checkpoint['d_inv_optimizer']) self.c_optimizer.load_state_dict(checkpoint['c_optimizer']) if self.args.dataset == 'gtav': self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
def __init__(self, args): self.args = args """ Define Saver """ self.saver = Saver(args) self.saver.save_experiment_config() """ Define Tensorboard Summary """ self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() self.use_amp = self.args.use_amp self.opt_level = self.args.opt_level if self.args.dist: torch.distributed.init_process_group(backend="nccl", init_method='env://') local_rank = self.args.local_rank torch.cuda.set_device(local_rank) device = torch.device("cuda", local_rank) dist.barrier() """ Define Dataloader """ kwargs = { 'num_workers': args.workers, 'pin_memory': True, 'drop_last': True } self.train_loader, self.val_loader, _, self.nclass = make_data_loader( args, **kwargs) """ Define Criterion """ """ whether to use class balanced weights """ if args.use_balanced_weights: classes_weights_path = os.path.join( Path.db_root_dir(args.dataset), args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = nn.CrossEntropyLoss(weight=weight, ignore_index=255).cuda() # self.criterion = DataParallelCriterion(self.criterion, device_ids=self.args.gpu_ids).cuda() if args.network == 'searched-dense': cell_path = os.path.join(args.saved_arch_path, 'autodeeplab', 'genotype.npy') cell_arch = np.load(cell_path) if self.args.C == 2: C_index = [5] #4_15_80e_40a_03-lr_5e-4wd_6e-4alr_1e-3awd 513x513 batch 4 network_arch = [1, 2, 2, 2, 3, 2, 2, 1, 1, 1, 1, 2] low_level_layer = 0 elif self.args.C == 3: C_index = [3, 7] network_arch = [1, 2, 3, 2, 2, 3, 2, 3, 2, 3, 2, 3] low_level_layer = 0 elif self.args.C == 4: C_index = [2, 5, 8] network_arch = [1, 2, 3, 3, 2, 3, 3, 3, 3, 3, 2, 2] low_level_layer = 0 model = ADD(network_arch, C_index, cell_arch, self.nclass, args, low_level_layer) elif args.network.startswith('autodeeplab'): network_arch = [0, 0, 0, 1, 2, 1, 2, 2, 3, 3, 2, 1] cell_path = os.path.join(args.saved_arch_path, 'autodeeplab', 'genotype.npy') cell_arch = np.load(cell_path) low_level_layer = 2 if self.args.C == 2: C_index = [5] elif self.args.C == 3: C_index = [3, 7] elif self.args.C == 4: C_index = [2, 5, 8] if args.network == 'autodeeplab-dense': model = ADD(network_arch, C_index, cell_arch, self.nclass, args, low_level_layer) elif args.network == 'autodeeplab-baseline': model = Baselin_Model(network_arch, C_index, cell_arch, self.nclass, args, low_level_layer) """ Define Optimizer """ optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) self.model, self.optimizer = model, optimizer """ Define Evaluator """ self.evaluator = [] for num in range(self.args.C): self.evaluator.append(Evaluator(self.nclass)) """ Define lr scheduler """ self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) if args.cuda: self.model = self.model.cuda() """ mixed precision """ if self.use_amp and args.cuda: keep_batchnorm_fp32 = True if (self.opt_level == 'O2' or self.opt_level == 'O3') else None """ fix for current pytorch version with opt_level 'O1' """ if self.opt_level == 'O1' and torch.__version__ < '1.3': for module in self.model.modules(): if isinstance(module, torch.nn.modules.batchnorm. _BatchNorm) or isinstance( module, SynchronizedBatchNorm2d): """ Hack to fix BN fprop without affine transformation """ if module.weight is None: module.weight = torch.nn.Parameter( torch.ones(module.running_var.shape, dtype=module.running_var.dtype, device=module.running_var.device), requires_grad=False) if module.bias is None: module.bias = torch.nn.Parameter( torch.zeros(module.running_var.shape, dtype=module.running_var.dtype, device=module.running_var.device), requires_grad=False) # print(keep_batchnorm_fp32) self.model, self.optimizer = amp.initialize( self.model, self.optimizer, opt_level=self.opt_level, keep_batchnorm_fp32=keep_batchnorm_fp32, loss_scale="dynamic") if args.cuda and len(self.args.gpu_ids) > 1: if self.opt_level == 'O2' or self.opt_level == 'O3': print( 'currently cannot run with nn.DataParallel and optimization level', self.opt_level) if self.args.dist: self.model = DistributedDataParallel( self.model, device_ids=[self.args.local_rank], output_device=self.args.local_rank).cuda() else: self.model = torch.nn.DataParallel( self.model, device_ids=self.args.gpu_ids) # patch_replication_callback(self.model) print('training on multiple-GPUs') """ Resuming checkpoint """ self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] """ if the weights are wrapped in module object we have to clean it """ if args.clean_module: self.model.load_state_dict(checkpoint['state_dict']) state_dict = checkpoint['state_dict'] new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove 'module.' of dataparallel new_state_dict[name] = v copy_state_dict(self.model.state_dict(), new_state_dict) else: if (torch.cuda.device_count() > 1): copy_state_dict(self.model.module.state_dict(), checkpoint['state_dict']) else: copy_state_dict(self.model.state_dict(), checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) """ Clear start epoch if fine-tuning """ if args.ft: args.start_epoch = 0
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() self.use_amp = True if (APEX_AVAILABLE and args.use_amp) else False self.opt_level = args.opt_level kwargs = { 'num_workers': args.workers, 'pin_memory': True, 'drop_last': True } self.train_loaderA, self.train_loaderB, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) if args.use_balanced_weights: classes_weights_path = os.path.join( Path.db_root_dir(args.dataset), args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: #if so, which trainloader to use? weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) # Define network model = AutoDeeplab(num_classes=self.nclass, num_layers=12, criterion=self.criterion, filter_multiplier=self.args.filter_multiplier, block_multiplier=self.args.block_multiplier, step=self.args.step) optimizer = torch.optim.SGD(model.weight_parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) self.model, self.optimizer = model, optimizer self.architect_optimizer = torch.optim.Adam( self.model.arch_parameters(), lr=args.arch_lr, betas=(0.9, 0.999), weight_decay=args.arch_weight_decay) # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loaderA), min_lr=args.min_lr) # TODO: Figure out if len(self.train_loader) should be devided by two ? in other module as well # Using cuda if args.cuda: self.model = self.model.cuda() # mixed precision if self.use_amp and args.cuda: keep_batchnorm_fp32 = True if (self.opt_level == 'O2' or self.opt_level == 'O3') else None # fix for current pytorch version with opt_level 'O1' if self.opt_level == 'O1' and torch.__version__ < '1.3': for module in self.model.modules(): if isinstance(module, torch.nn.modules.batchnorm._BatchNorm): # Hack to fix BN fprop without affine transformation if module.weight is None: module.weight = torch.nn.Parameter( torch.ones(module.running_var.shape, dtype=module.running_var.dtype, device=module.running_var.device), requires_grad=False) if module.bias is None: module.bias = torch.nn.Parameter( torch.zeros(module.running_var.shape, dtype=module.running_var.dtype, device=module.running_var.device), requires_grad=False) # print(keep_batchnorm_fp32) self.model, [self.optimizer, self.architect_optimizer] = amp.initialize( self.model, [self.optimizer, self.architect_optimizer], opt_level=self.opt_level, keep_batchnorm_fp32=keep_batchnorm_fp32, loss_scale="dynamic") print('cuda finished') # Using data parallel if args.cuda and len(self.args.gpu_ids) > 1: if self.opt_level == 'O2' or self.opt_level == 'O3': print( 'currently cannot run with nn.DataParallel and optimization level', self.opt_level) self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) print('training on multiple-GPUs') #checkpoint = torch.load(args.resume) #print('about to load state_dict') #self.model.load_state_dict(checkpoint['state_dict']) #print('model loaded') #sys.exit() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # if the weights are wrapped in module object we have to clean it if args.clean_module: self.model.load_state_dict(checkpoint['state_dict']) state_dict = checkpoint['state_dict'] new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove 'module.' of dataparallel new_state_dict[name] = v self.model.load_state_dict(new_state_dict) else: if (torch.cuda.device_count() > 1 or args.load_parallel): self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
def __init__(self, args): self.args = args self.btch_size = args.test_batch_size # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) # Define network model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join( Path.db_root_dir(args.dataset), args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.ft: args.start_epoch = 0
def __init__(self, args): self.args = args # Define Dataloader kwargs = { 'num_workers': args.workers, 'pin_memory': True, 'drop_last': True } _, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) if args.network == 'searched_dense': """ 40_5e_lr_38_31.91 """ cell_path = os.path.join(args.saved_arch_path, 'autodeeplab', 'genotype.npy') cell_arch = np.load(cell_path) network_arch = [0, 1, 2, 3, 2, 2, 2, 2, 1, 2, 3, 2] low_level_layer = 0 model = Model_2(network_arch, cell_arch, self.nclass, args, low_level_layer) elif args.network == 'searched_baseline': cell_path = os.path.join(args.saved_arch_path, 'searched_baseline', 'genotype.npy') cell_arch = np.load(cell_path_1) network_arch = [0, 1, 2, 2, 3, 2, 2, 1, 2, 1, 1, 2] low_level_layer = 1 model = Model_2_baseline(network_arch, cell_arch, self.nclass, args, low_level_layer) elif args.network.startswith('autodeeplab'): network_arch = [0, 0, 0, 1, 2, 1, 2, 2, 3, 3, 2, 1] cell_path = os.path.join(args.saved_arch_path, 'autodeeplab', 'genotype.npy') cell_arch = np.load(cell_path) low_level_layer = 2 if args.network == 'autodeeplab-dense': model = Model_2(network_arch, cell_arch, self.nclass, args, low_level_layer) elif args.network == 'autodeeplab-baseline': model = Model_2_baseline(network_arch, cell_arch, self.nclass, args, low_level_layer) if args.use_balanced_weights: classes_weights_path = os.path.join( Path.db_root_dir(args.dataset), args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = nn.CrossEntropyLoss(weight=weight, ignore_index=255).cuda() self.model = model # Define Evaluator self.evaluator_1 = Evaluator(self.nclass) self.evaluator_2 = Evaluator(self.nclass) # Using cuda if args.cuda: self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # if the weights are wrapped in module object we have to clean it if args.clean_module: self.model.load_state_dict(checkpoint['state_dict']) state_dict = checkpoint['state_dict'] new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove 'module.' of dataparallel new_state_dict[name] = v self.model.load_state_dict(new_state_dict) else: self.model.load_state_dict(checkpoint['state_dict']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch']))
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary # 使用tensorboardX可视化 self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader # kwargs = {'num_workers': args.workers, 'pin_memory': True} kwargs = {'num_workers': 0, 'pin_memory': True} #self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader(args, **kwargs) self.train_loader, self.val_loader, self.nclass = make_data_loader( args, **kwargs) # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join( Path.db_root_dir(args.dataset), args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None # Define Criterion self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) if args.model_name == 'unet': #model = UNet_ac(args.n_channels, args.n_filters, args.n_class).cuda() model = UNet_SNws(args.n_channels, args.n_filters, args.n_class, args.using_movavg, args.using_bn).cuda() model = UNet_bn(args.n_channels, args.n_filters, args.n_class).cuda() optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) #optimizer = torch.optim.AdamW(model.parameters(), lr=args.arch_lr, betas=(0.9, 0.999), weight_decay=args.weight_decay) #optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # elif args.model_name == 'hunet': # model = HUNet(args.n_channels, args.n_filters, args.n_class, args.using_movavg, args.using_bn).cuda() # optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.model_name == 'unet3+': model = UNet3p_SNws(args.n_channels, args.n_filters, args.n_class, args.using_movavg, args.using_bn).cuda() optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.model_name == 'unet3+_aspp': #model = UNet3p_aspp(args.n_channels, args.n_filters, args.n_class, args.using_movavg, args.using_bn).cuda() #model = UNet3p_aspp_SNws(args.n_channels, args.n_filters, args.n_class, args.using_movavg, args.using_bn).cuda() #model = UNet3p_res_aspp_SNws(args.n_channels, args.n_filters, args.n_class, args.using_movavg, args.using_bn).cuda() model = UNet3p_res_edge_aspp_SNws(args.n_channels, args.n_filters, args.n_class, args.using_movavg, args.using_bn).cuda() optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.model_name == 'unet3+_ocr': model = UNet3p_res_ocr_SNws(args.n_channels, args.n_filters, args.n_class, args.using_movavg, args.using_bn).cuda() optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # elif args.model_name == 'unet3+_resnest_aspp': # model = UNet3p_resnest_aspp(args.n_channels, args.n_filters, args.n_class, args.using_movavg, args.using_bn).cuda() # optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.model_name == 'gscnn': model = GSCNN(args.n_channels, args.n_filters, args.n_class, args.using_movavg, args.using_bn).cuda() optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.model_name == 'pspnet': model = PSPNet(args.n_channels, args.n_filters, args.n_class).cuda() optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.model_name == 'segnet': model = Segnet(args.n_channels, args.n_filters, args.n_class).cuda() optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.model_name == 'hrnet': MODEL = { 'ALIGN_CORNERS': True, 'EXTRA': { 'FINAL_CONV_KERNEL': 1, # EXTRA 具体定义了模型的结果,包括 4 个 STAGE,各自的参数 'STAGE1': { 'NUM_MODULES': 1, # HighResolutionModule 重复次数 'NUM_BRANCHES': 1, # 分支数 'BLOCK': 'BOTTLENECK', 'NUM_BLOCKS': 4, 'NUM_CHANNELS': 64, 'FUSE_METHOD': 'SUM' }, 'STAGE2': { 'NUM_MODULES': 1, 'NUM_BRANCHES': 2, 'BLOCK': 'BASIC', 'NUM_BLOCKS': [4, 4], 'NUM_CHANNELS': [48, 96], 'FUSE_METHOD': 'SUM' }, 'STAGE3': { 'NUM_MODULES': 4, 'NUM_BRANCHES': 3, 'BLOCK': 'BASIC', 'NUM_BLOCKS': [4, 4, 4], 'NUM_CHANNELS': [48, 96, 192], 'FUSE_METHOD': 'SUM' }, 'STAGE4': { 'NUM_MODULES': 3, 'NUM_BRANCHES': 4, 'BLOCK': 'BASIC', 'NUM_BLOCKS': [4, 4, 4, 4], 'NUM_CHANNELS': [48, 96, 192, 384], 'FUSE_METHOD': 'SUM' } } } model = HighResolutionNet(args.n_channels, args.n_filters, args.n_class, MODEL).cuda() # model.init_weights() optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.model_name == 'hrnet+ocr': MODEL = { 'ALIGN_CORNERS': True, 'EXTRA': { 'FINAL_CONV_KERNEL': 1, # EXTRA 具体定义了模型的结果,包括 4 个 STAGE,各自的参数 'STAGE1': { 'NUM_MODULES': 1, # HighResolutionModule 重复次数 'NUM_BRANCHES': 1, # 分支数 'BLOCK': 'BOTTLENECK', 'NUM_BLOCKS': 4, 'NUM_CHANNELS': 64, 'FUSE_METHOD': 'SUM' }, 'STAGE2': { 'NUM_MODULES': 1, 'NUM_BRANCHES': 2, 'BLOCK': 'BASIC', 'NUM_BLOCKS': [4, 4], 'NUM_CHANNELS': [48, 96], 'FUSE_METHOD': 'SUM' }, 'STAGE3': { 'NUM_MODULES': 4, 'NUM_BRANCHES': 3, 'BLOCK': 'BASIC', 'NUM_BLOCKS': [4, 4, 4], 'NUM_CHANNELS': [48, 96, 192], 'FUSE_METHOD': 'SUM' }, 'STAGE4': { 'NUM_MODULES': 3, 'NUM_BRANCHES': 4, 'BLOCK': 'BASIC', 'NUM_BLOCKS': [4, 4, 4, 4], 'NUM_CHANNELS': [48, 96, 192, 384], 'FUSE_METHOD': 'SUM' } } } # model = HighResolutionNet_OCR(args.n_channels, args.n_filters, args.n_class, MODEL).cuda() model = HighResolutionNet_OCR_SNws(args.n_channels, args.n_filters, args.n_class, MODEL).cuda() # model.init_weights() optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.model_name == 'deeplabv3+': # Define network model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) backbone = model.backbone # backbone.conv1 = nn.Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) print('change the input channels', backbone.conv1) train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] #optimizer = torch.optim.SGD(train_params, momentum=args.momentum, # weight_decay=args.weight_decay, nesterov=args.nesterov) optimizer = torch.optim.AdamW(train_params, weight_decay=args.weight_decay) #optimizer = torch.optim.AdamW(train_params, lr=args.arch_lr, betas=(0.9, 0.999), weight_decay=args.weight_decay) #elif args.model_name == 'autodeeplab': #model = AutoDeeplab(args.n_class, 12, self.criterion, crop_size=args.crop_size) #optimizer = torch.optim.AdamW(model.weight_parameters(), lr=args.lr, weight_decay=args.weight_decay) #optimizer = torch.optim.SGD(model.weight_parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler #self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, # args.epochs, len(self.train_loader)) self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: print(self.args.gpu_ids) self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # denormalize for detph image self.mean_depth = torch.as_tensor(0.12176,dtype=torch.float32,device='cpu') self.std_depth = torch.as_tensor(0.09752,dtype=torch.float32,device='cpu') # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': False} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader(args, **kwargs) # Define network resnet = resnet18(pretrained=True, efficient=False, use_bn= True) model = RFNet(resnet, num_classes=self.nclass, use_bn=True) train_params = [{'params': model.random_init_params()}, {'params': model.fine_tune_params(), 'lr': args.lr, 'weight_decay':args.weight_decay}] # Define Optimizer optimizer = torch.optim.Adam(train_params, lr=args.lr * 4, weight_decay=args.weight_decay * 4) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join(Path.db_root_dir(args.dataset), args.dataset+'_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None # Define loss function self.criterion = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} if (args.dataset == "fashion_clothes"): train_set = fashion.FashionDataset( args, Path.db_root_dir("fashion_clothes"), mode='train') val_set = fashion.FashionDataset( args, Path.db_root_dir("fashion_clothes"), mode='test') self.nclass = train_set.nclass print("Train size {}, val size {}".format(len(train_set), len(val_set))) self.train_loader = DataLoader(dataset=train_set, batch_size=args.batch_size, shuffle=True, **kwargs) self.val_loader = DataLoader(dataset=val_set, batch_size=args.batch_size, shuffle=False, **kwargs) self.test_loader = None assert self.nclass == 7 self.best_pred = 0.0 if args.model == 'deeplabv3+': model = DeepLab(backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) if args.cuda: checkpoint = torch.load(args.resume) else: checkpoint = torch.load(args.resume, map_location='cpu') args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) #Freeze the backbone if args.freeze_backbone: set_parameter_requires_grad(model.backbone, False) ######NEW DECODER###### #Different type of FT if args.ft_type == 'decoder': set_parameter_requires_grad(model, False) model.decoder = build_decoder(self.nclass, 'resnet', nn.BatchNorm2d) train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] elif args.ft_type == 'last_layer': set_parameter_requires_grad(model, False) model.decoder.last_conv[8] = nn.Conv2d( in_channels=256, out_channels=self.nclass, kernel_size=1) model.decoder.last_conv[8].reset_parameters() train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] if args.ft_type == 'all': #Reset last layer, to generate output we want model.decoder.last_conv[8] = nn.Conv2d( in_channels=256, out_channels=self.nclass, kernel_size=1) model.decoder.last_conv[8].reset_parameters() train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) print("weight is {}".format(weight)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) args.start_epoch = 0
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader1, self.train_loader2, self.val_loader, self.test_loader, self.nclass = make_data_loader(args, **kwargs) if args.use_balanced_weights: classes_weights_path = os.path.join(Path.db_root_dir(args.dataset), args.dataset+'_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: #if so, which trainloader to use? weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) # Define network model = AutoDeeplab (num_classes=self.nclass, num_layers=12, criterion=self.criterion) optimizer = torch.optim.SGD( model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay ) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader1)) self.architect = Architect (self.model, args) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model.cuda()) #patch_replication_callback(self.model) self.model = self.model.cuda() print ('cuda finished') # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) cell_path = os.path.join(args.saved_arch_path, 'genotype.npy') network_path_space = os.path.join(args.saved_arch_path, 'network_path_space.npy') new_cell_arch = np.load(cell_path) new_network_arch = np.load(network_path_space) # Define network model = newModel(network_arch=new_network_arch, cell_arch=new_cell_arch, num_classes=self.nclass, num_layers=12) # output_stride=args.out_stride, # sync_bn=args.sync_bn, # freeze_bn=args.freeze_bn) # TODO: look into these # TODO: ALSO look into different param groups as done int deeplab below # train_params = [{'params': model.get_1x_lr_params(), 'lr': args.lr}, # {'params': model.get_10x_lr_params(), 'lr': args.lr * 10}] # train_params = [{'params': model.parameters(), 'lr': args.lr}] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join( Path.db_root_dir(args.dataset), args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler( args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) #TODO: use min_lr ? # TODO: Figure out if len(self.train_loader) should be devided by two ? in other module as well # Using cuda if args.cuda: if (torch.cuda.device_count() > 1 or args.load_parallel): self.model = torch.nn.DataParallel(self.model.cuda()) patch_replication_callback(self.model) self.model = self.model.cuda() print('cuda finished') # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # if the weights are wrapped in module object we have to clean it if args.clean_module: self.model.load_state_dict(checkpoint['state_dict']) state_dict = checkpoint['state_dict'] new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove 'module.' of dataparallel new_state_dict[name] = v self.model.load_state_dict(new_state_dict) else: if (torch.cuda.device_count() > 1 or args.load_parallel): self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary #self.summary = TensorboardSummary(self.saver.experiment_dir) #self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) # Define network model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join( Path.db_root_dir(args.dataset), args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) if not args.test: self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader(args, **kwargs) if self.args.norm == 'gn': norm = gn elif self.args.norm == 'bn': if self.args.sync_bn: norm = syncbn else: norm = bn elif self.args.norm == 'abn': if self.args.sync_bn: norm = syncabn(self.args.gpu_ids) else: norm = abn else: print("Please check the norm.") exit() # Define network if self.args.model =='deeplabv3+': model = DeepLab(args=self.args, num_classes=self.nclass, freeze_bn=args.freeze_bn) elif self.args.model =='deeplabv3': model = DeepLabv3(Norm=self.args.norm, backbone=args.backbone, output_stride=args.out_stride, num_classes=self.nclass, freeze_bn=args.freeze_bn) elif self.args.model == 'fpn': model = FPN ( args=args, num_classes=self.nclass ) ''' model.cuda() summary(model, input_size=(3, 720, 1280)) exit() ''' train_params = [{'params': model.get_1x_lr_params(), 'lr': args.lr}, {'params': model.get_10x_lr_params(), 'lr': args.lr * 10}] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join(Path.db_root_dir(args.dataset), args.dataset+'_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume)) checkpoint = torch.load(args.resume) if args.ft: args.start_epoch = 0 else: args.start_epoch = checkpoint['epoch'] if args.cuda: #self.model.module.load_state_dict(checkpoint['state_dict']) pretrained_dict = checkpoint['state_dict'] model_dict = {} state_dict = self.model.module.state_dict() for k, v in pretrained_dict.items(): if k in state_dict: model_dict[k] = v state_dict.update(model_dict) self.model.module.load_state_dict(state_dict) else: #self.model.load_state_dict(checkpoint['state_dict']) pretrained_dict = checkpoint['state_dict'] model_dict = {} state_dict = self.model.state_dict() for k, v in pretrained_dict.items(): if k in state_dict: model_dict[k] = v state_dict.update(model_dict) self.model.load_state_dict(state_dict) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) elif args.decoder is not None: if not os.path.isfile(args.decoder): raise RuntimeError("=> no checkpoint for decoder found at '{}'" .format(args.decoder)) checkpoint = torch.load(args.decoder) args.start_epoch = 0 # As every time loads decoder only should be finetuning if args.cuda: decoder_dict = checkpoint['state_dict'] model_dict = {} state_dict = self.model.module.state_dict() for k, v in decoder_dict.items(): if not 'aspp' in k: continue if k in state_dict: model_dict[k] = v state_dict.update(model_dict) self.model.module.load_state_dict(state_dict) else: raise NotImplementedError("Please USE CUDA!!!") # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader(args, **kwargs) # Define network model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) train_params = [{'params': model.get_1x_lr_params(), 'lr': args.lr}, {'params': model.get_10x_lr_params(), 'lr': args.lr * 10}] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join(Path.db_root_dir(args.dataset), args.dataset+'_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) # Define network model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) # when model is initialized, the track_running_stats is true so the running_mean and running_var is # initialized and loaded the pretrained model. Since the model uses the batch stats during training mode, # the optimization is easier while the running stats will be used for eval mode. # Using batch stats makes optimization easier for child in model.modules(): if type(child) == nn.BatchNorm2d: child.track_running_stats = False # use batch stats for train and eval modes; # if running stats are not None, they are still updated # in such toy example, we do not use running stats!!! if type(child) == nn.Dropout: child.p = 0 # no dropout train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join( Path.db_root_dir(args.dataset), args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer if args.densecrfloss > 0: self.densecrflosslayer = DenseCRFLoss( weight=args.densecrfloss, sigma_rgb=args.sigma_rgb, sigma_xy=args.sigma_xy, scale_factor=args.rloss_scale) print(self.densecrflosslayer) # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0 self.trainLoss = [] self.miou = [] self.mean_entropy = [] self.mid_entropy = [] self.celoss = [] self.crfloss = []