def init_generator(self, args): self.generator_model = DeepLab(num_classes=self.nnclass, backbone='resnet', output_stride=16, sync_bn=None, freeze_bn=False).cuda() self.generator_model = torch.nn.DataParallel( self.generator_model).cuda() patch_replication_callback(self.generator_model) if args.resume: print('#--------- load pretrained model --------------#') model_dict = self.generator_model.module.state_dict() checkpoint = torch.load(args.resume) pretrained_dict = { k: v for k, v in checkpoint['state_dict'].items() if 'last_conv' not in k and k in model_dict.keys() } #pretrained_dict = {k.replace('module.',''):v for k,v in checkpoint['state_dict'].items() if 'last_conv' not in k} model_dict.update(pretrained_dict) self.generator_model.module.load_state_dict(model_dict) for param in self.generator_model.parameters(): param.requires_grad = True
def init_target(self, args): self.target_model = DeepLab(num_classes=self.nnclass, backbone='resnet', output_stride=16, sync_bn=None, freeze_bn=False) self.train_params = [{ 'params': self.target_model.get_1x_lr_params(), 'lr': args.lr }, { 'params': self.target_model.get_10x_lr_params(), 'lr': args.lr * 10 }] self.target_model = torch.nn.DataParallel(self.target_model) self.target_criterion = SegmentationLosses( weight=None, cuda=args.cuda).build_loss( mode='bce') #torch.nn.BCELoss(reduce ='mean') patch_replication_callback(self.target_model) model_dict = self.target_model.module.state_dict() checkpoint = torch.load(args.resume) #pretrained_dict = {k:v for k,v in checkpoint['state_dict'].items() if 'last_conv' not in k } pretrained_dict = { k.replace('module.', ''): v for k, v in checkpoint['state_dict'].items() } model_dict.update(pretrained_dict) self.target_model.module.load_state_dict(model_dict) self.target_model = self.target_model.cuda() return
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) # Define weight self.temporal_weight = args.temporal_weight self.spatial_weight = args.spatial_weight # Define network temporal_model = Model(name='vgg16_bn', num_classes=101, is_flow=True).get_model() spatial_model = Model(name='vgg16_bn', num_classes=101, is_flow=False).get_model() # Define Optimizer #optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) temporal_optimizer = torch.optim.Adam(temporal_model.parameters(), lr=args.temporal_lr) spatial_optimizer = torch.optim.Adam(spatial_model.parameters(), lr=args.spatial_lr) # Define Criterion self.temporal_criterion = nn.BCELoss().cuda() self.spatial_criterion = nn.BCELoss().cuda() self.temporal_model, self.temporal_optimizer = temporal_model, temporal_optimizer self.spatial_model, self.spatial_optimizer = spatial_model, spatial_optimizer # Define Evaluator self.top1_eval = Evaluator(self.nclass) # Using cuda if args.cuda: self.temporal_model = torch.nn.DataParallel( self.temporal_model, device_ids=self.args.gpu_ids) patch_replication_callback(self.temporal_model) self.temporal_model = self.temporal_model.cuda() self.spatial_model = torch.nn.DataParallel( self.spatial_model, device_ids=self.args.gpu_ids) patch_replication_callback(self.spatial_model) self.spatial_model = self.spatial_model.cuda() # Resuming checkpoint self.best_accuracy = 0.0 '''
def __init__(self, args): self.args = args kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_dataloader( args.dataset, args.base_size, args.crop_size, args.batch_size, args.overfit, **kwargs) self.model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) self.evaluator = Evaluator(self.nclass) if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() if args.use_balanced_weights: classes_weights_path = os.path.join(constants.DATASET_ROOT, args.dataset, 'class_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weights_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) checkpoint = torch.load(args.resume) if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) print( f'=> loaded checkpoint {args.resume} (epoch {checkpoint["epoch"]})' ) self.visualizations_folder = os.path.join( os.path.dirname(os.path.realpath(args.resume)), constants.VISUALIZATIONS_FOLDER) if not os.path.exists(self.visualizations_folder): os.makedirs(self.visualizations_folder)
def initialize(self): args = self.args model = DeepLabAccuracyPredictor(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn, mc_dropout=False, enet=args.architecture == 'enet', symmetry=args.symmetry) train_params = model.get_param_list(args.lr, args.architecture == 'enet', args.symmetry) if args.optimizer == 'SGD': optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) elif args.optimizer == 'Adam': optimizer = torch.optim.Adam(train_params, weight_decay=args.weight_decay) else: raise NotImplementedError if args.use_balanced_weights: weight = calculate_weights_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion_deeplab = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.criterion_unet = SegmentationLosses(weight=torch.FloatTensor( [args.weight_wrong_label_unet, 1 - args.weight_wrong_label_unet]), cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer self.deeplab_evaluator = Evaluator(self.nclass) self.unet_evaluator = Evaluator(2) if args.use_lr_scheduler: self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) else: self.scheduler = None if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() self.best_pred = 0.0
def __init__(self, config): self.config = config self.best_pred = 0.0 # Define Saver self.saver = Saver(config) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.config['training']['tensorboard']['log_dir']) self.writer = self.summary.create_summary() self.train_loader, self.val_loader, self.test_loader, self.nclass = initialize_data_loader(config) # Define network model = DeepLab(num_classes=self.nclass, backbone=self.config['network']['backbone'], output_stride=self.config['image']['out_stride'], sync_bn=self.config['network']['sync_bn'], freeze_bn=self.config['network']['freeze_bn']) train_params = [{'params': model.get_1x_lr_params(), 'lr': self.config['training']['lr']}, {'params': model.get_10x_lr_params(), 'lr': self.config['training']['lr'] * 10}] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=self.config['training']['momentum'], weight_decay=self.config['training']['weight_decay'], nesterov=self.config['training']['nesterov']) # Define Criterion # whether to use class balanced weights if self.config['training']['use_balanced_weights']: classes_weights_path = os.path.join(self.config['dataset']['base_path'], self.config['dataset']['dataset_name'] + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(self.config, self.config['dataset']['dataset_name'], self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses(weight=weight, cuda=self.config['network']['use_cuda']).build_loss(mode=self.config['training']['loss_type']) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(self.config['training']['lr_scheduler'], self.config['training']['lr'], self.config['training']['epochs'], len(self.train_loader)) # Using cuda if self.config['network']['use_cuda']: self.model = torch.nn.DataParallel(self.model) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint if self.config['training']['weights_initialization']['use_pretrained_weights']: if not os.path.isfile(self.config['training']['weights_initialization']['restore_from']): raise RuntimeError("=> no checkpoint found at '{}'" .format(self.config['training']['weights_initialization']['restore_from'])) if self.config['network']['use_cuda']: checkpoint = torch.load(self.config['training']['weights_initialization']['restore_from']) else: checkpoint = torch.load(self.config['training']['weights_initialization']['restore_from'], map_location={'cuda:0': 'cpu'}) self.config['training']['start_epoch'] = checkpoint['epoch'] if self.config['network']['use_cuda']: self.model.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) # if not self.config['ft']: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})" .format(self.config['training']['weights_initialization']['restore_from'], checkpoint['epoch']))
def main(cfg, gpus): # Network Builders if args.use_float16: from torch.cuda.amp import autocast as autocast, GradScaler scaler = GradScaler() else: scaler = None if args.lesslabel: label_num_ = 42 else: label_num_=args.num_class net_encoder = ModelBuilder.build_encoder( arch=cfg.MODEL.arch_encoder.lower(), fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_encoder) net_decoder = ModelBuilder.build_decoder( arch=cfg.MODEL.arch_decoder.lower(), fc_dim=cfg.MODEL.fc_dim, num_class=label_num_, weights=cfg.MODEL.weights_decoder) crit = nn.NLLLoss(ignore_index=255) if cfg.MODEL.arch_decoder.endswith('deepsup'): segmentation_module = SegmentationModule( net_encoder, net_decoder, crit, cfg.TRAIN.deep_sup_scale) else: segmentation_module = SegmentationModule( net_encoder, net_decoder, crit) # Dataset and Loader # dataset_train = TrainDataset( # cfg.DATASET.root_dataset, # cfg.DATASET.list_train, # cfg.DATASET, # batch_per_gpu=cfg.TRAIN.batch_size_per_gpu) if args.use_clipdataset: dataset_train = BaseDataset_longclip(args,'train') else: dataset_train = BaseDataset( args, 'train' ) loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=args.batchsize, # we have modified data_parallel shuffle=True, # we do not use this param num_workers=args.workers, drop_last=True, pin_memory=True) print('1 Epoch = {} iters'.format(cfg.TRAIN.epoch_iters)) dataset_val = BaseDataset( args, 'val' ) loader_val = torch.utils.data.DataLoader(dataset_val,batch_size=args.batchsize,shuffle=False,num_workers=args.workers) # create loader iterator # load nets into gpu segmentation_module.cuda(args.start_gpu) if args.gpu_num>1: train_gpu_ = list(range(args.gpu_num)) train_gpu_ = [int(gpu_+args.start_gpu) for gpu_ in train_gpu_] print(train_gpu_) segmentation_module = torch.nn.DataParallel(segmentation_module, device_ids=train_gpu_) patch_replication_callback(segmentation_module) # Set up optimizers nets = (net_encoder, net_decoder, crit) optimizers = create_optimizers(nets, cfg) # Main loop history = {'train': {'epoch': [], 'loss': [], 'acc': []}} # test(segmentation_module,loader_val,args) for epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch): #checkpoint(nets,optimizers, history, args, epoch+1) print('Epoch {}'.format(epoch)) train(segmentation_module, loader_train, optimizers, history, epoch+1, cfg,args,scaler=scaler) ################### # checkpointing checkpoint(nets,optimizers, history, args, epoch+1) if args.validation: test(segmentation_module,loader_val,args) print('Training Done!')
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) model = None # Define network if self.args.backbone == 'unet': model = UNet(in_channels=4, n_classes=self.nclass, sync_bn=args.sync_bn) print("using UNet") if self.args.backbone == 'unetNested': model = UNetNested(in_channels=4, n_classes=self.nclass, sync_bn=args.sync_bn) print("using UNetNested") # train_params = [{'params': model.get_params(), 'lr': args.lr}] train_params = [{'params': model.get_params()}] # Define Optimizer # optimizer = torch.optim.SGD(train_params, momentum=args.momentum, # weight_decay=args.weight_decay, nesterov=args.nesterov) optimizer = torch.optim.Adam(train_params, self.args.learn_rate, weight_decay=args.weight_decay, amsgrad=True) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join( Path.db_root_dir(args.dataset), args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler # self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
def initialize(self): args = self.args if args.architecture == 'deeplab': print('Using Deeplab') model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] elif args.architecture == 'enet': print('Using ENet') model = ENet(num_classes=self.nclass, encoder_relu=True, decoder_relu=True) train_params = [{'params': model.parameters(), 'lr': args.lr}] elif args.architecture == 'fastscnn': print('Using FastSCNN') model = FastSCNN(3, self.nclass) train_params = [{'params': model.parameters(), 'lr': args.lr}] if args.optimizer == 'SGD': optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) elif args.optimizer == 'Adam': optimizer = torch.optim.Adam(train_params, weight_decay=args.weight_decay) else: raise NotImplementedError if args.use_balanced_weights: weight = calculate_weights_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer self.evaluator = Evaluator(self.nclass) if args.use_lr_scheduler: self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) else: self.scheduler = None if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() self.best_pred = 0.0
def main(cfg, gpu, args): if args.lesslabel: num_class = 42 else: num_class = args.num_class torch.cuda.set_device(gpu) # Network Builders if args.method == 'tdnet': segmentation_module = td4_psp(args=args, backbone='resnet18') else: net_encoder = ModelBuilder.build_encoder(arch=cfg.MODEL.arch_encoder, fc_dim=cfg.MODEL.fc_dim, weights='') net_decoder = ModelBuilder.build_decoder(arch=cfg.MODEL.arch_decoder, fc_dim=cfg.MODEL.fc_dim, num_class=num_class, weights='', use_softmax=True) crit = nn.NLLLoss(ignore_index=-1) if args.method == 'netwarp': segmentation_module = NetWarp(net_encoder, net_decoder, crit, args, cfg.TRAIN.deep_sup_scale) elif args.method == 'ETC': segmentation_module = ETC(net_encoder, net_decoder, crit, args, cfg.TRAIN.deep_sup_scale) elif args.method == 'nonlocal3d': segmentation_module = Non_local3d(args, net_encoder, crit) elif args.method == 'our_warp': segmentation_module = ClipWarpNet(net_encoder, net_decoder, crit, args) elif args.method == 'propnet': segmentation_module = PropNet(net_encoder, net_decoder, crit, args) elif args.method == 'our_warp_merge': segmentation_module = OurWarpMerge(net_encoder, net_decoder, crit, args) elif args.method == 'clip_psp': segmentation_module = Clip_PSP(net_encoder, crit, args) elif args.method == 'clip_ocr': segmentation_module = ClipOCRNet(net_encoder, crit, args) elif args.method == 'netwarp_ocr': segmentation_module = NetWarp_ocr(net_encoder, crit, args) elif args.method == 'etc_ocr': segmentation_module = ETC_ocr(net_encoder, crit, args) else: raise NotImplementedError segmentation_module.cuda(args.start_gpu) to_load = torch.load(args.load, map_location=torch.device("cuda:" + str(args.start_gpu))) new_state_dict = OrderedDict() for k, v in to_load.items(): name = k[7:] # remove `module.`,表面从第7个key值字符取到最后一个字符,正好去掉了module. new_state_dict[name] = v #新字典的key值对应的value为一一对应的值。 segmentation_module.load_state_dict(new_state_dict) if args.gpu_num > 1: train_gpu_ = list(range(args.gpu_num)) train_gpu_ = [int(gpu_ + args.start_gpu) for gpu_ in train_gpu_] print(train_gpu_) segmentation_module = torch.nn.DataParallel(segmentation_module, device_ids=train_gpu_) patch_replication_callback(segmentation_module) with open(os.path.join(args.dataroot, args.split + '.txt')) as f: lines = f.readlines() videolists = [line[:-1] for line in lines] # Dataset and Loader evaluator = Evaluator(num_class) eval_video = Evaluator(num_class) evaluator.reset() eval_video.reset() total_vmIOU = 0.0 total_vfwIOU = 0.0 total_video = len(videolists) total_VC_acc = [] for video in videolists: eval_video.reset() if args.method == 'clip_psp' or args.method == 'clip_ocr': test_dataset = TestDataset_longclip(args.dataroot, video, args, is_train=False) else: test_dataset = TestDataset_clip(args.dataroot, video, args, is_train=False) loader_test = torch.utils.data.DataLoader(test_dataset, batch_size=args.batchsize, shuffle=False, num_workers=0, drop_last=False) #### if args.method == 'nonlocal3d': gtlist_, predlist_, h, w = test_all(segmentation_module, loader_test, gpu, args, evaluator, eval_video, video) else: gtlist_, predlist_, h, w = test(segmentation_module, loader_test, gpu, args, evaluator, eval_video, video) accs = get_common(gtlist_, predlist_, args.vc_clip_num, h, w) print(sum(accs) / len(accs)) total_VC_acc.extend(accs) #### v_mIOU = eval_video.Mean_Intersection_over_Union() total_vmIOU += v_mIOU v_fwIOU = eval_video.Frequency_Weighted_Intersection_over_Union() print(video, v_mIOU) total_vfwIOU += v_fwIOU total_vmIOU = total_vmIOU / total_video total_vfwIOU = total_vfwIOU / total_video Acc = evaluator.Pixel_Accuracy() Acc_class = evaluator.Pixel_Accuracy_Class() mIoU = evaluator.Mean_Intersection_over_Union() FWIoU = evaluator.Frequency_Weighted_Intersection_over_Union() print( "Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}, video mIOU: {}, video fwIOU: {}" .format(Acc, Acc_class, mIoU, FWIoU, total_vmIOU, total_vfwIOU)) VC_Acc = np.array(total_VC_acc) VC_Acc = np.nanmean(VC_Acc) print("Video Consistency num :{} acc:{}".format(args.vc_clip_num, VC_Acc)) print('Inference done!')
def __init__(self, args): self.args = args # Define Saver # self.saver = Saver(args) # Recoder the running processing self.saver = Saver(args) sys.stdout = Logger( os.path.join( self.saver.experiment_dir, 'log_train-%s.txt' % time.strftime("%Y-%m-%d-%H-%M-%S"))) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) if args.dataset == 'pairwise_lits': proxy_nclasses = self.nclass = 3 elif args.dataset == 'pairwise_chaos': proxy_nclasses = 2 * self.nclass else: raise NotImplementedError # Define network model = ConsistentDeepLab(in_channels=3, num_classes=proxy_nclasses, pretrained=args.pretrained, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] # Define Optimizer # optimizer = torch.optim.SGD(train_params, momentum=args.momentum, # weight_decay=args.weight_decay, nesterov=args.nesterov) optimizer = torch.optim.Adam(train_params, weight_decay=args.weight_decay) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: weights = calculate_weigths_labels(args.dataset, self.train_loader, proxy_nclasses) else: weights = None # Initializing loss print("Initializing loss: {}".format(args.loss_type)) self.criterion = losses.init_loss(args.loss_type, weights=weights) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
def __init__(self, args): self.args = args self.saver = PassiveSaver(args) self.saver.save_experiment_config() self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() kwargs = {'pin_memory': False, 'memory_hog': args.memory_hog} self.train_set, self.train_loader, self.val_loader, self.test_loader, self.nclass = make_dataloader( args.dataset, args.base_size, args.crop_size, args.batch_size, args.workers, args.overfit, **kwargs) self.train_set.make_dataset_multiple_of_batchsize(args.batch_size) if args.architecture == 'deeplab': print('Using Deeplab') model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] elif args.architecture == 'enet': print('Using ENet') model = ENet(num_classes=self.nclass, encoder_relu=True, decoder_relu=True) train_params = [{'params': model.parameters(), 'lr': args.lr}] elif args.architecture == 'fastscnn': print('Using FastSCNN') model = FastSCNN(3, self.nclass) train_params = [{'params': model.parameters(), 'lr': args.lr}] if args.optimizer == 'SGD': optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) elif args.optimizer == 'Adam': optimizer = torch.optim.Adam(train_params, weight_decay=args.weight_decay) else: raise NotImplementedError if args.use_balanced_weights: weight = calculate_weights_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer self.evaluator = Evaluator(self.nclass) if args.use_lr_scheduler: self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) else: self.scheduler = None if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError(f"=> no checkpoint found at {args.resume}") checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print( f'=> loaded checkpoint {args.resume} (epoch {checkpoint["epoch"]})' )
def __init__(self, args): # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.source_loader, self.target_loader, _, self.nclass = make_data_loader( args, **kwargs) # Define Target Model self.target_model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) # Using cuda self.best_pred = {'disc': 0.0, 'cup': 0.0} self.target_model = torch.nn.DataParallel(self.target_model) patch_replication_callback(self.target_model) self.target_model = self.target_model.cuda() model_dict = self.target_model.module.state_dict() pretrained_dict = { k: v for k, v in checkpoint['state_dict'].items() if 'last_conv' not in k } model_dict.update(pretrained_dict) self.target_model.module.load_state_dict(model_dict) self.target_model.train() self.set_requires_grad('target', True) # Define learning rate and optimizer params target_params = [{ 'params': self.target_model.module.get_1x_lr_params(), 'lr': args.lr }, { 'params': self.target_model.module.get_10x_lr_params(), 'lr': args.lr * 10 }] target_optim = torch.optim.SGD(target_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) target_optim.zero_grad() self.target_criterion = torch.nn.BCEWithLogitsLoss() self.target_optim = target_optim # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.target_loader)) self.evaluator = Evaluator(3)
def main(cfg, gpus): # Network Builders label_num_ = args.num_class if args.method == 'tdnet': n_img_per_gpu = int(args.batchsize / args.gpu_num) n_min = n_img_per_gpu * args.cropsize * args.cropsize // 16 loss_fn = OhemCELoss2D(thresh=0.7, n_min=n_min, ignore_index=255) segmentation_module = td4_psp(args=args, backbone='resnet18', loss_fn=loss_fn) segmentation_module.pretrained_init() else: net_encoder = ModelBuilder.build_encoder( arch=cfg.MODEL.arch_encoder.lower(), fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_encoder, args=args) net_decoder = ModelBuilder.build_decoder( arch=cfg.MODEL.arch_decoder.lower(), fc_dim=cfg.MODEL.fc_dim, num_class=label_num_, weights=cfg.MODEL.weights_decoder) crit = nn.NLLLoss(ignore_index=255) if args.method == 'netwarp': segmentation_module = NetWarp(net_encoder, net_decoder, crit, args, cfg.TRAIN.deep_sup_scale) elif args.method == 'ETC': segmentation_module = ETC(net_encoder, net_decoder, crit, args, cfg.TRAIN.deep_sup_scale) elif args.method == 'nonlocal3d': segmentation_module = Non_local3d(args, net_encoder, crit) elif args.method == 'our_warp': if args.deepsup_scale > 0.: segmentation_module = ClipWarpNet(net_encoder, net_decoder, crit, args, args.deepsup_scale) else: segmentation_module = ClipWarpNet(net_encoder, net_decoder, crit, args) elif args.method == 'propnet': segmentation_module = PropNet(net_encoder, net_decoder, crit, args, deep_sup_scale=args.deepsup_scale) elif args.method == 'our_warp_merge': segmentation_module = OurWarpMerge(net_encoder, net_decoder, crit, args, deep_sup_scale=0.4) elif args.method == 'clip_psp': segmentation_module = Clip_PSP(net_encoder, crit, args, deep_sup_scale=0.4) elif args.method == 'clip_ocr': segmentation_module = ClipOCRNet(net_encoder, crit, args, deep_sup_scale=0.4) elif args.method == 'netwarp_ocr': segmentation_module = NetWarp_ocr(net_encoder, crit, args, deep_sup_scale=0.4) elif args.method == 'etc_ocr': segmentation_module = ETC_ocr(net_encoder, crit, args, deep_sup_scale=0.4) else: raise (NotImplementedError) # Dataset and Loader if args.method == 'clip_psp' or args.method == 'clip_ocr': dataset_train = BaseDataset_longclip(args, 'train') else: dataset_train = BaseDataset_clip(args, 'train') loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=args.batchsize, # we have modified data_parallel shuffle=True, # we do not use this param num_workers=args.workers, drop_last=True, pin_memory=False) print('1 Epoch = {} iters'.format(cfg.TRAIN.epoch_iters)) # load nets into gpu segmentation_module.cuda(args.start_gpu) optimizer = create_optimizers(segmentation_module, cfg, args) if args.resume_epoch != 0: to_load = torch.load( os.path.join('./resume', 'model_epoch_{}.pth'.format(args.resume_epoch)), map_location=torch.device("cuda:" + str(args.start_gpu))) from collections import OrderedDict new_state_dict = OrderedDict() for k, v in to_load.items(): name = k[7:] # remove `module.`,表面从第7个key值字符取到最后一个字符,正好去掉了module. new_state_dict[name] = v #新字典的key值对应的value为一一对应的值。 cfg.TRAIN.start_epoch = args.resume_epoch segmentation_module.load_state_dict(new_state_dict) optimizer.load_state_dict( torch.load( os.path.join('./resume', 'opt_epoch_{}.pth'.format(args.resume_epoch)), map_location=torch.device("cuda:" + str(args.start_gpu)))) print('resume from epoch {}'.format(args.resume_epoch)) if args.gpu_num > 1: train_gpu_ = list(range(args.gpu_num)) train_gpu_ = [int(gpu_ + args.start_gpu) for gpu_ in train_gpu_] print(train_gpu_) segmentation_module = torch.nn.DataParallel(segmentation_module, device_ids=train_gpu_) patch_replication_callback(segmentation_module) # print(segmentation_module) # Set up optimizers # Main loop history = {'train': {'epoch': [], 'loss': [], 'acc': []}} #if len(args.resume_dir)>0: # resume_epoch = args.resume_dir.split('.')[] for epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch): print('Epoch {}'.format(epoch)) #checkpoint(optimizer,segmentation_module, history, args, epoch+1) train(segmentation_module, loader_train, optimizer, history, epoch + 1, cfg, args) ################### # checkpointing if (epoch + 1) % 20 == 0: checkpoint(optimizer, segmentation_module, history, args, epoch + 1) if args.validation: test(segmentation_module, args) # print('Training Done!')
def __init__(self, args, cuda=None): self.args = args os.environ["CUDA_VISIBLE_DEVICES"] = self.args.gpu self.cuda = cuda and torch.cuda.is_available() self.device = torch.device('cuda' if self.cuda else 'cpu') self.current_MIoU = 0 self.best_MIou = 0 self.current_epoch = 0 self.current_iter = 0 # set TensorboardX self.writer = SummaryWriter() # Metric definition self.Eval = Eval(self.args.num_classes) # loss definition if self.args.loss_weight_file is not None: classes_weights_path = os.path.join(self.args.loss_weights_dir, self.args.loss_weight_file) print(classes_weights_path) if not os.path.isfile(classes_weights_path): logger.info('calculating class weights...') calculate_weigths_labels(self.args) class_weights = np.load(classes_weights_path) pprint.pprint(class_weights) weight = torch.from_numpy(class_weights.astype(np.float32)) logger.info('loading class weights successfully!') else: weight = None self.loss = nn.CrossEntropyLoss(weight=weight, ignore_index=255) self.loss.to(self.device) # model self.model = DeepLab(output_stride=self.args.output_stride, class_num=self.args.num_classes, pretrained=self.args.imagenet_pretrained and self.args.pretrained_ckpt_file == None, bn_momentum=self.args.bn_momentum, freeze_bn=self.args.freeze_bn) self.model = nn.DataParallel(self.model, device_ids=range( ceil(len(self.args.gpu) / 2))) patch_replication_callback(self.model) self.model.to(self.device) self.optimizer = torch.optim.SGD( params=[ { "params": self.get_params(self.model.module, key="1x"), "lr": self.args.lr, }, { "params": self.get_params(self.model.module, key="10x"), "lr": 10 * self.args.lr, }, ], momentum=self.args.momentum, # dampening=self.args.dampening, weight_decay=self.args.weight_decay, # nesterov=self.args.nesterov ) # dataloader self.dataloader = CarvanaDataLoader(self.args) self.epoch_num = ceil(self.args.iter_max / self.dataloader.train_iterations)