Beispiel #1
0
        print(("=> loaded checkpoint '{}' (epoch {})".format(
            args.evaluate, checkpoint['epoch'])))
    else:
        print(("=> no checkpoint found at '{}'".format(args.resume)))

# dataset
train_transform = T.Compose([
    train_augmentation,
    ut_transforms.Stack(roll=args.arch == 'BNInception'),
    ut_transforms.ToTorchFormatTensor(div=args.arch != 'BNInception'),
    ut_transforms.IdentityTransform(),
])

test_transform = T.Compose([
    ut_transforms.GroupScale(int(scale_size)),
    ut_transforms.GroupCenterCrop(crop_size),
    ut_transforms.Stack(roll=args.arch == 'BNInception'),
    ut_transforms.ToTorchFormatTensor(div=args.arch != 'BNInception'),
    ut_transforms.IdentityTransform(),
])

train_dataset = TSNDataSet(num_segments=args.num_segments,
                           dataset=args.dataset,
                           new_length=data_length,
                           modality=args.modality,
                           transform=train_transform,
                           split=args.split,
                           train=True)

test_dataset = TSNDataSet(num_segments=args.num_segments,
                          dataset=args.dataset,
Beispiel #2
0
def main():
    global args, best_mIoU
    args = parser.parse_args()

    os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(gpu) for gpu in args.gpus)
    args.gpus = len(args.gpus)

    if args.no_partialbn:
        sync_bn.Synchronize.init(args.gpus)

    if args.dataset == 'VOCAug' or args.dataset == 'VOC2012' or args.dataset == 'COCO':
        num_class = 21
        ignore_label = 255
        scale_series = [10, 20, 30, 60]
    elif args.dataset == 'cityscapes':
        num_class = 19
        ignore_label = 0
        scale_series = [15, 30, 45, 90]
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model = models.FCN(num_class, base_model=args.arch, dropout=args.dropout, partial_bn=not args.no_partialbn)
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    model = torch.nn.DataParallel(model, device_ids=range(args.gpus)).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_mIoU = checkpoint['best_mIoU']
            torch.nn.Module.load_state_dict(model, checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    if args.weight:
        if os.path.isfile(args.weight):
            print(("=> loading initial weight '{}'".format(args.weight)))
            checkpoint = torch.load(args.weight)
            torch.nn.Module.load_state_dict(model, checkpoint['state_dict'])
        else:
            print(("=> no model file found at '{}'".format(args.weight)))

    cudnn.benchmark = True
    cudnn.fastest = True

    # Data loading code
    train_loader = torch.utils.data.DataLoader(
        getattr(ds, args.dataset + 'DataSet')(data_list=args.train_list, transform=torchvision.transforms.Compose([
            tf.GroupRandomHorizontalFlip(),
            tf.GroupRandomScale(size=(0.5, 2.0), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST)),
            tf.GroupRandomCrop(size=args.train_size),
            tf.GroupRandomPad(size=args.train_size, padding=(input_mean, (ignore_label, ))),
            tf.GroupRandomRotation(degree=(-10, 10), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST), padding=(input_mean, (ignore_label, ))),
            tf.GroupRandomBlur(applied=(True, False)),
            tf.GroupNormalize(mean=(input_mean, (0, )), std=(input_std, (1, ))),
        ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, drop_last=True)

    val_loader = torch.utils.data.DataLoader(
        getattr(ds, args.dataset + 'DataSet')(data_list=args.val_list, transform=torchvision.transforms.Compose([
            tf.GroupCenterCrop(size=args.test_size),
            tf.GroupConcerPad(size=args.test_size, padding=(input_mean, (ignore_label, ))),
            tf.GroupNormalize(mean=(input_mean, (0, )), std=(input_std, (1, ))),
        ])), batch_size=args.batch_size * 3, shuffle=False, num_workers=args.workers, pin_memory=True)

    # define loss function (criterion) optimizer and evaluator
    criterion = torch.nn.NLLLoss(ignore_index=ignore_label).cuda()
    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(group['name'], len(group['params']), group['lr_mult'], group['decay_mult'])))
    optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    evaluator = EvalSegmentation(num_class, ignore_label)

    if args.evaluate:
        validate(val_loader, model, criterion, 0, evaluator)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            mIoU = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), evaluator)
            # remember best mIoU and save checkpoint
            is_best = mIoU > best_mIoU
            best_mIoU = max(mIoU, best_mIoU)
            save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_mIoU': best_mIoU,
            }, is_best)
    def __call__(self, data):
        return data*2 - 1.0


train_transforms = T.Compose([
    ut_transforms.GroupScale(256), # resize smaller edge to 256
    ut_transforms.GroupRandomCrop(224), # randomlly crop a 224x224 patch
    ut_transforms.GroupRandomHorizontalFlip(),
    ut_transforms.GroupToTensor(),
    ut_transforms.StackTensor(),
    I3Dscale()
])

val_transforms = T.Compose([
    ut_transforms.GroupScale(256),
    ut_transforms.GroupCenterCrop(256), # full convolution in test time
    ut_transforms.GroupToTensor(),
    ut_transforms.StackTensor(),
    I3Dscale()
])



train_dataset = Consecutive(dataset=args.dataset, train=True, transform=train_transforms) #default 64 frames
val_dataset = Consecutive(dataset=args.dataset, train=False, transform=val_transforms, test_mode='else') # just load a 64 frames to test

train_loader = DataLoader(
    train_dataset, batch_size=args.batch_size, 
    shuffle=True, num_workers=args.workers,
    pin_memory=True)
Beispiel #4
0
# 暂时只支持RGB frames 和kinetics\ucf101数据集

train_transforms = T.Compose([
    ut_transforms.GroupRandomScale(
        size_low=256,
        size_high=320),  # randomly resize smaller edge to [256, 320]
    ut_transforms.GroupRandomCrop(224),  # randomlly crop a 224x224 patch
    ut_transforms.GroupToTensor(),
    # ut_transforms.GroupNormalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ut_transforms.StackTensor()
])

val_transforms = T.Compose([
    ut_transforms.GroupScale(256),  # scale to 256 and do fully-convolutional
    ut_transforms.GroupCenterCrop(256),
    ut_transforms.GroupToTensor(),
    # ut_transforms.GroupNormalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ut_transforms.StackTensor()
])

train_dataset = Consecutive(
    dataset=args.dataset, train=True, interval=2,
    transform=train_transforms)  #default 64/2 = 32 frames
val_dataset = Consecutive(dataset=args.dataset,
                          train=False,
                          interval=2,
                          transform=val_transforms,
                          test_mode='else')  # also 32 frames

train_loader = DataLoader(train_dataset,
    def __call__(self, data):
        return data*2 - 1.0


train_transforms = T.Compose([
    ut_transforms.GroupScale(256), # resize smaller edge to 256
    ut_transforms.GroupRandomCrop(224), # randomlly crop a 224x224 patch
    ut_transforms.GroupRandomHorizontalFlip(),
    ut_transforms.GroupToTensor(),
    ut_transforms.StackTensor(),
    I3Dscale()
])

val_transforms = T.Compose([
    ut_transforms.GroupScale(256),
    ut_transforms.GroupCenterCrop(224), # center crop 224x224 patch
    ut_transforms.GroupToTensor(),
    ut_transforms.StackTensor(),
    I3Dscale()
])

train_dataset = Consecutive(dataset=args.dataset, train=True, transform=train_transforms) #default 64 frames
val_dataset = Consecutive(dataset=args.dataset, train=False, transform=val_transforms, test_mode='else')

train_loader = DataLoader(
    train_dataset, batch_size=args.batch_size, 
    shuffle=True, num_workers=args.workers,
    pin_memory=True)

val_loader = DataLoader(
    val_dataset, batch_size=1,