args = { 'iter_num': 30000, 'train_batch_size': 10, 'last_iter': 0, 'lr': 1e-3, 'lr_decay': 0.9, 'weight_decay': 5e-4, 'momentum': 0.9, 'snapshot': '', 'crop_size': 380 } joint_transform = joint_transforms.Compose([ joint_transforms.RandomCrop(args['crop_size']), joint_transforms.RandomHorizontallyFlip(), joint_transforms.RandomRotate(10), ]) img_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) target_transform = transforms.ToTensor() train_set = ImageFolder(duts_train_path, joint_transform, img_transform, target_transform) train_loader = DataLoader(train_set, batch_size=args['train_batch_size'], num_workers=12, shuffle=True,
'momentum': 0.95, 'snapshot': '', 'pretrain': os.path.join(ckpt_path, 'VideoSaliency_2019-12-24 22:05:11', '50000.pth'), # 'pretrain': '', 'imgs_file': 'Pre-train/pretrain_all_seq_DUT_TR_DAFB2_DAVSOD2.txt', # 'imgs_file': 'video_saliency/train_all_DAFB2_DAVSOD_5f.txt', 'train_loader': 'both' # 'train_loader': 'video_sequence' } imgs_file = os.path.join(datasets_root, args['imgs_file']) # imgs_file = os.path.join(datasets_root, 'video_saliency/train_all_DAFB3_seq_5f.txt') joint_transform = joint_transforms.Compose([ joint_transforms.ImageResize(520), joint_transforms.RandomCrop(473), joint_transforms.RandomHorizontallyFlip(), joint_transforms.RandomRotate(10) ]) # joint_seq_transform = joint_transforms.Compose([ # joint_transforms.ImageResize(520), # joint_transforms.RandomCrop(473) # ]) input_size = (473, 473) img_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) target_transform = transforms.ToTensor()
exp_name = 'R3Net' args = { 'iter_num': 12000, 'train_batch_size': 16, 'last_iter': 0, 'lr': 1e-3, 'lr_decay': 0.9, 'weight_decay': 5e-4, 'momentum': 0.9, 'snapshot': '' } joint_transform = joint_transforms.Compose([ joint_transforms.RandomCrop(300), joint_transforms.RandomHorizontallyFlip(), joint_transforms.RandomRotate(10) ]) img_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) target_transform = transforms.ToTensor() train_set = ImageFolder(duts_path, joint_transform, img_transform, target_transform) train_loader = DataLoader(train_set, batch_size=args['train_batch_size'], num_workers=12, shuffle=True)
'add_graph': False, 'poly_train': True } # Path. check_mkdir(ckpt_path) check_mkdir(os.path.join(ckpt_path, exp_name)) vis_path = os.path.join(ckpt_path, exp_name, 'log') check_mkdir(vis_path) log_path = os.path.join(ckpt_path, exp_name, str(datetime.datetime.now()) + '.txt') writer = SummaryWriter(log_dir=vis_path, comment=exp_name) # Transform Data. joint_transform = joint_transforms.Compose([ joint_transforms.RandomRotate(), joint_transforms.Resize((args['scale'], args['scale'])) ]) val_joint_transform = joint_transforms.Compose( [joint_transforms.Resize((args['scale'], args['scale']))]) img_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # maybe can optimized. ]) target_transform = transforms.ToTensor() # Prepare Data Set. train_set = ImageFolder(msd_training_root, joint_transform, img_transform, target_transform) print("Train set: {}".format(train_set.__len__())) train_loader = DataLoader(train_set,
'out_stride': 16, # 8 or 16 'sync_bn': None, # whether to use sync bn (default: auto) 'freeze_bn': False, 'pre_train': True } transform = transforms.Compose([ transforms.ToTensor() #transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) to_pil = transforms.ToPILImage() joint_transform = joint_transforms.Compose([ joint_transforms.Resize((args['img_size_h'], args['img_size_w'])), #joint_transforms.RandomCrop(args['crop_size']), joint_transforms.RandomHorizontallyFlip() ]) joint_transform_val = joint_transforms.Compose([ joint_transforms.Resize((args['img_size_h'], args['img_size_w'])), ]) train_set = ImageFolder(train_cuhkshadow_path, transform=transform, target_transform=transform, joint_transform=joint_transform, is_train=True, batch_size=args['train_batch_size']) train_loader = DataLoader(train_set, batch_size=args['train_batch_size'],
# batch size of 8 with resolution of 416*416 is exactly OK for the GTX 1080Ti GPU args = { 'iter_num': 3000, 'train_batch_size': 8, 'last_iter': 0, 'lr': 5e-3, 'lr_decay': 0.9, 'weight_decay': 5e-4, 'momentum': 0.9, 'snapshot': '', 'scale': 416 } joint_transform = joint_transforms.Compose([ joint_transforms.RandomHorizontallyFlip(), joint_transforms.Resize((args['scale'], args['scale'])) ]) val_joint_transform = joint_transforms.Compose( [joint_transforms.Resize((args['scale'], args['scale']))]) img_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) target_transform = transforms.ToTensor() to_pil = transforms.ToPILImage() train_set = ImageFolder(sbu_training_root, joint_transform, img_transform, target_transform) train_loader = DataLoader(train_set, batch_size=args['train_batch_size'], num_workers=8,
def train_online(net, seq_name='breakdance'): online_args = { 'iter_num': 100, 'train_batch_size': 1, 'lr': 1e-10, 'lr_decay': 0.95, 'weight_decay': 5e-4, 'momentum': 0.95, } joint_transform = joint_transforms.Compose([ joint_transforms.ImageResize(380), # joint_transforms.RandomCrop(473), # joint_transforms.RandomHorizontallyFlip(), # joint_transforms.RandomRotate(10) ]) target_transform = transforms.ToTensor() # train_set = VideoFSImageFolder(to_test['davis'], seq_name, use_first=True, joint_transform=joint_transform, transform=img_transform) train_set = VideoFirstImageFolder(to_test['davis'], gt_root, seq_name, joint_transform=joint_transform, transform=img_transform, target_transform=target_transform) online_train_loader = DataLoader( train_set, batch_size=online_args['train_batch_size'], num_workers=1, shuffle=False) # criterion = nn.MSELoss().cuda() criterion = nn.BCEWithLogitsLoss().cuda() erosion = Erosion2d(1, 1, 5, soft_max=False).cuda() net.train() net.cuda() # fix_parameters(net.named_parameters()) optimizer = optim.SGD([{ 'params': [ param for name, param in net.named_parameters() if name[-4:] == 'bias' ], 'lr': 2 * online_args['lr'] }, { 'params': [ param for name, param in net.named_parameters() if name[-4:] != 'bias' ], 'lr': online_args['lr'], 'weight_decay': online_args['weight_decay'] }], momentum=online_args['momentum']) for curr_iter in range(0, online_args['iter_num']): total_loss_record, loss0_record, loss1_record = AvgMeter(), AvgMeter( ), AvgMeter() loss2_record = AvgMeter() for i, data in enumerate(online_train_loader): optimizer.param_groups[0]['lr'] = 2 * online_args['lr'] * ( 1 - float(curr_iter) / online_args['iter_num'])**online_args['lr_decay'] optimizer.param_groups[1]['lr'] = online_args['lr'] * ( 1 - float(curr_iter) / online_args['iter_num'])**online_args['lr_decay'] inputs, labels = data batch_size = inputs.size(0) inputs = Variable(inputs).cuda() labels = Variable(labels).cuda() optimizer.zero_grad() if args['model'] == 'BASNet': total_loss, loss0, loss1, loss2 = train_BASNet( net, inputs, criterion, erosion, labels) elif args['model'] == 'R3Net': total_loss, loss0, loss1, loss2 = train_R3Net( net, inputs, criterion, erosion, labels) elif args['model'] == 'DSSNet': total_loss, loss0, loss1, loss2 = train_DSSNet( net, inputs, criterion, erosion, labels) elif args['model'] == 'CPD': total_loss, loss0, loss1, loss2 = train_CPD( net, inputs, criterion, erosion, labels) elif args['model'] == 'RAS': total_loss, loss0, loss1, loss2 = train_RAS( net, inputs, criterion, erosion, labels) elif args['model'] == 'PoolNet': total_loss, loss0, loss1, loss2 = train_PoolNet( net, inputs, criterion, erosion, labels) elif args['model'] == 'F3Net': total_loss, loss0, loss1, loss2 = train_F3Net( net, inputs, criterion, erosion, labels) elif args['model'] == 'R2Net': total_loss, loss0, loss1, loss2 = train_R2Net( net, inputs, criterion, erosion, labels) total_loss.backward() optimizer.step() total_loss_record.update(total_loss.data, batch_size) loss0_record.update(loss0.data, batch_size) loss1_record.update(loss1.data, batch_size) loss2_record.update(loss2.data, batch_size) # loss3_record.update(loss3.data, batch_size) # loss4_record.update(loss4.data, batch_size) log = '[iter %d], [total loss %.5f], [loss0 %.8f], [loss1 %.8f], [loss2 %.8f], [lr %.13f]' % \ (curr_iter, total_loss_record.avg, loss0_record.avg, loss1_record.avg, loss2_record.avg, optimizer.param_groups[1]['lr']) print(log) print('taking snapshot ...') torch.save( net.state_dict(), os.path.join(ckpt_path, exp_name, str(args['snapshot']) + '_' + seq_name + '_online.pth')) # torch.save(optimizer.state_dict(), # os.path.join(ckpt_path, exp_name, '%d_optim.pth' % curr_iter)) return net
# 'train_loader': 'video_image' 'train_loader': 'flow_image3', # 'train_loader': 'video_sequence' 'image_size': 430, 'crop_size': 380, 'self_distill': 0.1, 'teacher_distill': 0.6 } imgs_file = os.path.join(datasets_root, args['imgs_file']) # imgs_file = os.path.join(datasets_root, 'video_saliency/train_all_DAFB3_seq_5f.txt') joint_transform = joint_transforms.Compose([ joint_transforms.ImageResize(args['image_size']), joint_transforms.RandomCrop(args['crop_size']), # joint_transforms.ColorJitter(hue=[-0.1, 0.1], saturation=0.05), joint_transforms.RandomHorizontallyFlip(), joint_transforms.RandomRotate(10) ]) # joint_transform = joint_transforms.Compose([ # joint_transforms.ImageResize(290), # joint_transforms.RandomCrop(256), # joint_transforms.RandomHorizontallyFlip(), # joint_transforms.RandomRotate(10) # ]) # joint_seq_transform = joint_transforms.Compose([ # joint_transforms.ImageResize(520), # joint_transforms.RandomCrop(473) # ])
import torch from dataset import ImageFolder from PIL import Image from torch.autograd import Variable from torchvision import transforms from config import sbu_testing_root from model import STFM torch.cuda.set_device(0) ckpt_path = 'model\\' exp_name = '' args = {'snapshot': 'STFA', 'scale': 256} joint_transform = joint_transforms.Compose( [joint_transforms.Resize((args['scale'], args['scale']))]) val_joint_transform = joint_transforms.Compose( [joint_transforms.Resize((args['scale'], args['scale']))]) img_transform = transforms.Compose([ transforms.Resize(args['scale']), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) target_transform = transforms.ToTensor() to_test = {'sbu': sbu_testing_root} to_pil = transforms.ToPILImage() def main():
return self.im2te(img), self.lb2te(label) class ImToTensor(object): def __call__(self, im): trans = transforms.Compose([ transforms.ToTensor(), # transforms.Normalize(mean=[x/255.0 for x in [85.86, 91.79, 85.00]], # std=[x/255.0 for x in [35.79, 35.13, 36.51]]), ]) return trans(im) class MaskToTensor(object): def __call__(self, label): return torch.from_numpy(np.array(label, dtype=np.int32)).long() if __name__ == '__main__': label = [i_id.strip() for i_id in open('./list/top_potsdam.txt')] transform_train = joint_transforms.Compose([ joint_transforms.RandomCrop(384), joint_transforms.Scale(400), joint_transforms.RandomRotate(10), joint_transforms.RandomHorizontallyFlip(), ]) dataset = RSData('train', '/home/jinqizhao/dataset/image/Remote_sensing/potsdam/2_Ortho_RGB_seg/', '/home/jinqizhao/dataset/image/Remote_sensing/potsdam/Label_gray/', label, transforms=transform_train) img, label = dataset[0] print(1)
def main(train_args): check_mkdir(os.path.join(train_args['ckpt_path'], args['exp'])) check_mkdir( os.path.join(train_args['ckpt_path'], args['exp'], train_args['exp_name'])) model = DeepLabV3('1') # print(model) device = torch.device("cuda") num_gpu = list(range(torch.cuda.device_count())) """###############------use gpu--------###############""" if args['use_gpu']: ts = time.time() print(torch.cuda.current_device()) print(torch.cuda.get_device_name(0)) model = nn.DataParallel(model, device_ids=num_gpu) model = model.to(device) print("Finish cuda loading ,time elapsed {}", format(time.time() - ts)) else: print("please check your gpu device,start training on cpu") """###############-------中间开始训练--------###############""" if len(train_args['snapshot']) == 0: curr_epoch = 1 train_args['best_record'] = { 'epoch': 0, 'val_loss': 1e10, 'acc': 0, 'acc_cls': 0, 'mean_iu': 0, 'fwavacc': 0 } # model.apply(weights_init) else: print("train resume from " + train_args['snapshot']) state_dict = torch.load( os.path.join(train_args['ckpt_path'], args['exp'], train_args['exp_name'], train_args['snapshot'])) new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] new_state_dict[name] = v model.load_state_dict(new_state_dict) # model.load_state_dict( # torch.load(os.path.join(train_args['ckpt_path'],args['exp'],train_args['exp_name'], train_args['snapshot']))) split_snapshot = train_args['snapshot'].split('_') curr_epoch = int(split_snapshot[1]) + 1 train_args['best_record'] = { 'epoch': int(split_snapshot[1]), 'val_loss': float(split_snapshot[3]), 'acc': float(split_snapshot[5]), 'acc_cls': float(split_snapshot[7]), 'mean_iu': float(split_snapshot[9]), 'fwavacc': float(split_snapshot[11]) } model.train() mean_std = ([0.485, 0.456, 0.406, 0.450], [0.229, 0.224, 0.225, 0.225]) """#################---数据增强和数据变换等操作------########""" input_transform = standard_transforms.Compose([ standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std) ]) ##Nomorlized target_transform = extended_transforms.MaskToTensor() # target to tensor joint_transform = joint_transforms.Compose([ joint_transforms.RandomHorizontallyFlip(), joint_transforms.RandomCrop((256, 256), padding=0), joint_transforms.Rotate(degree=90) ]) ###data_augment restore = standard_transforms.Compose([ extended_transforms.DeNormalize(*mean_std), extended_transforms.channel_4_to_channel_3(4, 3), ##默认3通道如果四通道会转成三通道 standard_transforms.ToPILImage(), ]) # DeNomorlized,出来是pil图片了 visualize = standard_transforms.Compose([ standard_transforms.Resize(256), standard_transforms.CenterCrop(256), ##中心裁剪,此处可以删除 standard_transforms.ToTensor() ]) # resize 大小之后转tensor """#################---数据加载------########""" train_set = yaogan(mode='train', cls=train_args['training_cls'], joint_transform=None, input_transform=input_transform, target_transform=target_transform) train_loader = DataLoader(train_set, batch_size=train_args['batch_size'], num_workers=train_args['num_works'], shuffle=True) val_set = yaogan(mode='val', cls=train_args['training_cls'], input_transform=input_transform, target_transform=target_transform) val_loader = DataLoader(val_set, batch_size=1, num_workers=train_args['num_works'], shuffle=False) # test_set=yaogan(mode='test',cls=train_args['training_cls'],joint_transform=None, # input_transform=input_transform,target_transform=None) # test_loader=DataLoader(test_set,batch_size=1, # num_workers=train_args['num_works'], shuffle=False) optimizer = optim.Adadelta(model.parameters(), lr=train_args['lr']) ##define a weighted loss (0weight for 0 label) # weight=[0.09287939 ,0.02091968 ,0.02453979, 0.25752962 ,0.33731845, 1., # 0.09518322, 0.52794035 ,0.24298112 ,0.02657369, 0.15057124 ,0.36864611, # 0.25835161,0.16672758 ,0.40728756 ,0.00751281] """###############-------训练数据权重--------###############""" if train_args['weight'] is not None: weight = [0.1, 1.] weight = torch.Tensor(weight) else: weight = None criterion = nn.CrossEntropyLoss(weight=weight, reduction='elementwise_mean', ignore_index=-100).to(device) # criterion=nn.BCELoss(weight=weight,reduction='elementwise_mean').cuda() check_mkdir(train_args['ckpt_path']) check_mkdir(os.path.join(train_args['ckpt_path'], args['exp'])) check_mkdir( os.path.join(train_args['ckpt_path'], args['exp'], train_args['exp_name'])) open( os.path.join(train_args['ckpt_path'], args['exp'], train_args['exp_name'], str(time.time()) + '.txt'), 'w').write(str(train_args) + '\n\n') """###############-------start training--------###############""" for epoch in range(curr_epoch, train_args['epoch_num'] + 1): adjust_lr(optimizer, epoch) train(train_loader, model, criterion, optimizer, epoch, train_args, device) val_loss = validate(val_loader, model, criterion, optimizer, restore, epoch, train_args, visualize, device) writer.close()
return self.im2te(im), self.lb2te(gt) class ImToTensor(object): def __call__(self, im): trans = transforms.Compose([ transforms.ToTensor(), # transforms.Normalize(mean=[x/255.0 for x in [85.86, 91.79, 85.00]], # std=[x/255.0 for x in [35.79, 35.13, 36.51]]), ]) return trans(im) class MaskToTensor(object): def __call__(self, gt): return torch.from_numpy(np.array(gt, dtype=np.int32)).long() if __name__ == '__main__': transform_train = joint_transforms.Compose([ joint_transforms.Scale(512), ]) dataset = PRCVData('train', '/data/jinqizhao/', './list/tank_val_list.txt', trans=transform_train) for i in range(len(dataset)): im, gt = dataset[i] print(i) print(1)
def train_online(net, seq_name='breakdance'): online_args = { 'iter_num': 100, 'train_batch_size': 5, 'lr': 1e-8, 'lr_decay': 0.95, 'weight_decay': 5e-4, 'momentum': 0.95, } joint_transform = joint_transforms.Compose([ joint_transforms.ImageResize(473), # joint_transforms.RandomCrop(473), # joint_transforms.RandomHorizontallyFlip(), # joint_transforms.RandomRotate(10) ]) target_transform = transforms.ToTensor() train_set = VideoFirstImageFolder(to_test['davis'], gt_root, seq_name, online_args['train_batch_size'], joint_transform, img_transform, target_transform) online_train_loader = DataLoader( train_set, batch_size=online_args['train_batch_size'], num_workers=1, shuffle=False) optimizer = optim.SGD([{ 'params': [ param for name, param in net.named_parameters() if name[-4:] == 'bias' ], 'lr': 2 * online_args['lr'] }, { 'params': [ param for name, param in net.named_parameters() if name[-4:] != 'bias' ], 'lr': online_args['lr'], 'weight_decay': online_args['weight_decay'] }], momentum=online_args['momentum']) criterion = nn.BCEWithLogitsLoss().cuda() net.train().cuda() fix_parameters(net.named_parameters()) for curr_iter in range(0, online_args['iter_num']): total_loss_record, loss0_record, loss1_record = AvgMeter(), AvgMeter( ), AvgMeter() loss2_record, loss3_record, loss4_record = AvgMeter(), AvgMeter( ), AvgMeter() for i, data in enumerate(online_train_loader): optimizer.param_groups[0]['lr'] = 2 * online_args['lr'] * ( 1 - float(curr_iter) / online_args['iter_num'])**online_args['lr_decay'] optimizer.param_groups[1]['lr'] = online_args['lr'] * ( 1 - float(curr_iter) / online_args['iter_num'])**online_args['lr_decay'] inputs, labels = data batch_size = inputs.size(0) inputs = Variable(inputs).cuda() labels = Variable(labels).cuda() optimizer.zero_grad() outputs0, outputs1, outputs2, outputs3, outputs4 = net(inputs) loss0 = criterion(outputs0, labels) loss1 = criterion(outputs1, labels.narrow(0, 1, 4)) loss2 = criterion(outputs2, labels.narrow(0, 2, 3)) loss3 = criterion(outputs3, labels.narrow(0, 3, 2)) loss4 = criterion(outputs4, labels.narrow(0, 4, 1)) total_loss = loss0 + loss1 + loss2 + loss3 + loss4 total_loss.backward() optimizer.step() total_loss_record.update(total_loss.data, batch_size) loss0_record.update(loss0.data, batch_size) loss1_record.update(loss1.data, batch_size) loss2_record.update(loss2.data, batch_size) loss3_record.update(loss3.data, batch_size) loss4_record.update(loss4.data, batch_size) log = '[iter %d], [total loss %.5f], [loss0 %.5f], [loss1 %.5f], [loss2 %.5f], [loss3 %.5f], ' \ '[loss4 %.5f], [lr %.13f]' % \ (curr_iter, total_loss_record.avg, loss0_record.avg, loss1_record.avg, loss2_record.avg, loss3_record.avg, loss4_record.avg, optimizer.param_groups[1]['lr']) print(log) return net
def __len__(self): return len(self.imgs) if __name__ == '__main__': from torchvision import transforms import joint_transforms from torch.utils.data import DataLoader from config import msra10k_path, video_seq_path, video_seq_gt_path, video_train_path import numpy as np joint_transform = joint_transforms.Compose([ joint_transforms.ImageResize(550), joint_transforms.RandomCrop(473), joint_transforms.ColorJitter(hue=[-0.1, 0.1], saturation=0.05), joint_transforms.RandomHorizontallyFlip(), joint_transforms.RandomRotate(10) ]) joint_seq_transform = joint_transforms.Compose( [joint_transforms.ImageResize(520), joint_transforms.RandomCrop(473)]) img_transform = transforms.Compose([ # transforms.ColorJitter(hue=[-0.1, 0.1]), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) target_transform = transforms.ToTensor() input_size = (473, 473)
ckpt_path = './model' exp_name = 'model_gatenet' args = { 'iter_num': 100000, 'train_batch_size': 4, 'last_iter': 0, 'lr': 1e-3, 'lr_decay': 0.9, 'weight_decay': 0.0005, 'momentum': 0.9, 'snapshot': '' } ##########################data augmentation############################### joint_transform = joint_transforms.Compose([ joint_transforms.RandomCrop(384, 384), # change to resize joint_transforms.RandomHorizontallyFlip(), joint_transforms.RandomRotate(10) ]) img_transform = transforms.Compose([ transforms.ColorJitter(0.1, 0.1, 0.1), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) target_transform = transforms.ToTensor() ########################################################################## train_set = ImageFolder(train_data, joint_transform, img_transform, target_transform) train_loader = DataLoader(train_set, batch_size=args['train_batch_size'], num_workers=0, shuffle=True) ###multi-scale-train # train_set = ImageFolder_multi_scale(train_data, joint_transform, img_transform, target_transform)