print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) # dataset train_transform = T.Compose([ train_augmentation, ut_transforms.Stack(roll=args.arch == 'BNInception'), ut_transforms.ToTorchFormatTensor(div=args.arch != 'BNInception'), ut_transforms.IdentityTransform(), ]) test_transform = T.Compose([ ut_transforms.GroupScale(int(scale_size)), ut_transforms.GroupCenterCrop(crop_size), ut_transforms.Stack(roll=args.arch == 'BNInception'), ut_transforms.ToTorchFormatTensor(div=args.arch != 'BNInception'), ut_transforms.IdentityTransform(), ]) train_dataset = TSNDataSet(num_segments=args.num_segments, dataset=args.dataset, new_length=data_length, modality=args.modality, transform=train_transform, split=args.split, train=True) test_dataset = TSNDataSet(num_segments=args.num_segments, dataset=args.dataset,
def main(): global args, best_mIoU args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(gpu) for gpu in args.gpus) args.gpus = len(args.gpus) if args.no_partialbn: sync_bn.Synchronize.init(args.gpus) if args.dataset == 'VOCAug' or args.dataset == 'VOC2012' or args.dataset == 'COCO': num_class = 21 ignore_label = 255 scale_series = [10, 20, 30, 60] elif args.dataset == 'cityscapes': num_class = 19 ignore_label = 0 scale_series = [15, 30, 45, 90] else: raise ValueError('Unknown dataset ' + args.dataset) model = models.FCN(num_class, base_model=args.arch, dropout=args.dropout, partial_bn=not args.no_partialbn) input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() model = torch.nn.DataParallel(model, device_ids=range(args.gpus)).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_mIoU = checkpoint['best_mIoU'] torch.nn.Module.load_state_dict(model, checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) if args.weight: if os.path.isfile(args.weight): print(("=> loading initial weight '{}'".format(args.weight))) checkpoint = torch.load(args.weight) torch.nn.Module.load_state_dict(model, checkpoint['state_dict']) else: print(("=> no model file found at '{}'".format(args.weight))) cudnn.benchmark = True cudnn.fastest = True # Data loading code train_loader = torch.utils.data.DataLoader( getattr(ds, args.dataset + 'DataSet')(data_list=args.train_list, transform=torchvision.transforms.Compose([ tf.GroupRandomHorizontalFlip(), tf.GroupRandomScale(size=(0.5, 2.0), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST)), tf.GroupRandomCrop(size=args.train_size), tf.GroupRandomPad(size=args.train_size, padding=(input_mean, (ignore_label, ))), tf.GroupRandomRotation(degree=(-10, 10), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST), padding=(input_mean, (ignore_label, ))), tf.GroupRandomBlur(applied=(True, False)), tf.GroupNormalize(mean=(input_mean, (0, )), std=(input_std, (1, ))), ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, drop_last=True) val_loader = torch.utils.data.DataLoader( getattr(ds, args.dataset + 'DataSet')(data_list=args.val_list, transform=torchvision.transforms.Compose([ tf.GroupCenterCrop(size=args.test_size), tf.GroupConcerPad(size=args.test_size, padding=(input_mean, (ignore_label, ))), tf.GroupNormalize(mean=(input_mean, (0, )), std=(input_std, (1, ))), ])), batch_size=args.batch_size * 3, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) optimizer and evaluator criterion = torch.nn.NLLLoss(ignore_index=ignore_label).cuda() for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) evaluator = EvalSegmentation(num_class, ignore_label) if args.evaluate: validate(val_loader, model, criterion, 0, evaluator) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: mIoU = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), evaluator) # remember best mIoU and save checkpoint is_best = mIoU > best_mIoU best_mIoU = max(mIoU, best_mIoU) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_mIoU': best_mIoU, }, is_best)
def __call__(self, data): return data*2 - 1.0 train_transforms = T.Compose([ ut_transforms.GroupScale(256), # resize smaller edge to 256 ut_transforms.GroupRandomCrop(224), # randomlly crop a 224x224 patch ut_transforms.GroupRandomHorizontalFlip(), ut_transforms.GroupToTensor(), ut_transforms.StackTensor(), I3Dscale() ]) val_transforms = T.Compose([ ut_transforms.GroupScale(256), ut_transforms.GroupCenterCrop(256), # full convolution in test time ut_transforms.GroupToTensor(), ut_transforms.StackTensor(), I3Dscale() ]) train_dataset = Consecutive(dataset=args.dataset, train=True, transform=train_transforms) #default 64 frames val_dataset = Consecutive(dataset=args.dataset, train=False, transform=val_transforms, test_mode='else') # just load a 64 frames to test train_loader = DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True)
# 暂时只支持RGB frames 和kinetics\ucf101数据集 train_transforms = T.Compose([ ut_transforms.GroupRandomScale( size_low=256, size_high=320), # randomly resize smaller edge to [256, 320] ut_transforms.GroupRandomCrop(224), # randomlly crop a 224x224 patch ut_transforms.GroupToTensor(), # ut_transforms.GroupNormalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ut_transforms.StackTensor() ]) val_transforms = T.Compose([ ut_transforms.GroupScale(256), # scale to 256 and do fully-convolutional ut_transforms.GroupCenterCrop(256), ut_transforms.GroupToTensor(), # ut_transforms.GroupNormalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ut_transforms.StackTensor() ]) train_dataset = Consecutive( dataset=args.dataset, train=True, interval=2, transform=train_transforms) #default 64/2 = 32 frames val_dataset = Consecutive(dataset=args.dataset, train=False, interval=2, transform=val_transforms, test_mode='else') # also 32 frames train_loader = DataLoader(train_dataset,
def __call__(self, data): return data*2 - 1.0 train_transforms = T.Compose([ ut_transforms.GroupScale(256), # resize smaller edge to 256 ut_transforms.GroupRandomCrop(224), # randomlly crop a 224x224 patch ut_transforms.GroupRandomHorizontalFlip(), ut_transforms.GroupToTensor(), ut_transforms.StackTensor(), I3Dscale() ]) val_transforms = T.Compose([ ut_transforms.GroupScale(256), ut_transforms.GroupCenterCrop(224), # center crop 224x224 patch ut_transforms.GroupToTensor(), ut_transforms.StackTensor(), I3Dscale() ]) train_dataset = Consecutive(dataset=args.dataset, train=True, transform=train_transforms) #default 64 frames val_dataset = Consecutive(dataset=args.dataset, train=False, transform=val_transforms, test_mode='else') train_loader = DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = DataLoader( val_dataset, batch_size=1,