Example #1
0
def main():
    global best_loss
    start_epoch = 0  # start from epoch 0 or last checkpoint epoch


    val_loader = torch.utils.data.DataLoader(
        davis.DavisSet(params, is_train=False),
        batch_size=int(params['batchSize']), shuffle=False,
        num_workers=args.workers, pin_memory=True)

    model = tc.TimeCycle()
    model = Wrap(model, 'forward_affinity')
    
    model = torch.nn.DataParallel(model).cuda()

    cudnn.benchmark = False
    print('    Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0))

    # Load checkpoint.
    if os.path.isfile(args.resume):
        print('==> Resuming from checkpoint..')
        checkpoint = torch.load(args.resume)
        partial_load(checkpoint['state_dict'], model)
        del checkpoint
    
    model.eval()

    if not os.path.exists(args.save_path):
        os.makedirs(args.save_path)
    
    
    print('\Testing')
    test_loss = test(val_loader, model, 1, use_cuda)
Example #2
0
def main():
    global best_loss
    start_epoch = 0  # start from epoch 0 or last checkpoint epoch

    args.kldv_coef = 1
    args.long_coef = 1

    args.frame_transforms = 'crop'
    args.frame_aug = 'grid'
    args.npatch = 49
    args.img_size = 256
    args.pstride = [0.5, 0.5]
    args.patch_size = [64, 64, 3]

    args.visualize = False

    model = tc.TimeCycle(args, vis=vis).cuda()

    params['mapScale'] = model(torch.zeros(1, 10, 3, 320, 320).cuda(),
                               just_feats=True)[1].shape[-2:]
    params['mapScale'] = 320 // np.array(params['mapScale'])

    val_loader = torch.utils.data.DataLoader(
        davis.DavisSet(params, is_train=False) if not 'jhmdb' in args.filelist  else \
            jhmdb.JhmdbSet(params, is_train=False),
        batch_size=int(params['batchSize']), shuffle=False,
        num_workers=args.workers, pin_memory=True)

    cudnn.benchmark = False
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    # Load checkpoint.
    if os.path.isfile(args.resume):
        print('==> Resuming from checkpoint..')
        checkpoint = torch.load(args.resume)

        utils.partial_load(checkpoint['model'], model, skip_keys=['head'])

        del checkpoint

    model.eval()
    # model = torch.nn.DataParallel(model).cuda()    #     model = model.cuda()
    model = model.cuda()

    if not os.path.exists(args.save_path):
        os.makedirs(args.save_path)

    print('\Testing')
    # with torch.no_grad():
    test_loss = test(val_loader, model, 1, use_cuda, args)
Example #3
0
def main():
    global best_loss
    start_epoch = 0  # start from epoch 0 or last checkpoint epoch

    model = tc.TimeCycle(args).cuda()
    model = Wrap(model)

    params['mapScale'] = model(torch.zeros(1, 10, 3, 320, 320).cuda(),
                               None,
                               True,
                               func='forward')[1].shape[-2:]
    params['mapScale'] = 320 // np.array(params['mapScale'])

    val_loader = torch.utils.data.DataLoader(davis.DavisSet(params,
                                                            is_train=False),
                                             batch_size=int(
                                                 params['batchSize']),
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    cudnn.benchmark = False
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    # Load checkpoint.
    if os.path.isfile(args.resume):
        print('==> Resuming from checkpoint..')
        checkpoint = torch.load(args.resume)
        # model.model.load_state_dict(checkpoint['model'])
        utils.partial_load(checkpoint['model'], model.model)

        del checkpoint

    model.eval()
    model = torch.nn.DataParallel(model).cuda()  #     model = model.cuda()

    if not os.path.exists(args.save_path):
        os.makedirs(args.save_path)

    print('\Testing')
    with torch.no_grad():
        test_loss = test(val_loader, model, 1, use_cuda)
Example #4
0
def main(args):
    if args.apex:
        if sys.version_info < (3, 0):
            raise RuntimeError(
                "Apex currently only supports Python 3. Aborting.")
        if amp is None:
            raise RuntimeError(
                "Failed to import apex. Please install apex from https://www.github.com/nvidia/apex "
                "to enable mixed-precision training.")

    if args.output_dir:
        utils.mkdir(args.output_dir)

    vis = utils.Visualize(args)

    utils.init_distributed_mode(args)
    print(args)
    print("torch version: ", torch.__version__)
    print("torchvision version: ", torchvision.__version__)

    device = torch.device(args.device)

    torch.backends.cudnn.benchmark = True

    # Data loading code
    print("Loading data")
    traindir = os.path.join(
        args.data_path, 'train_256' if not args.fast_test else 'val_256_bob')
    valdir = os.path.join(args.data_path, 'val_256_bob')
    normalize = T.Normalize(mean=[0.43216, 0.394666, 0.37645],
                            std=[0.22803, 0.22145, 0.216989])

    print("Loading training data")
    st = time.time()
    cache_path = _get_cache_path(traindir)

    frame_transform_train = utils.make_frame_transform(args.frame_transforms)

    transform_train = torchvision.transforms.Compose([
        #         torchvision.transforms.RandomGrayscale(p=1),
        frame_transform_train,
        T.ToFloatTensorInZeroOne(),
        T.Resize((256, 256)),
        # T.Resize((128, 171)),
        # T.RandomHorizontalFlip(),
        # T.GaussianBlurTransform(),
        normalize,
        # T.RandomCrop((112, 112))
    ])

    def make_dataset(is_train):
        _transform = transform_train if is_train else transform_test

        if 'kinetics' in args.data_path.lower():
            return Kinetics400(traindir if is_train else valdir,
                               frames_per_clip=args.clip_len,
                               step_between_clips=1,
                               transform=transform_train,
                               extensions=('mp4'),
                               frame_rate=args.frame_skip)
        else:
            return VideoList(
                args,
                is_train,
                frame_gap=args.frame_skip,
                transform=_transform,
                # frame_transform=_frame_transform
            )

    if args.cache_dataset and os.path.exists(cache_path):
        print("Loading dataset_train from {}".format(cache_path))
        dataset, _ = torch.load(cache_path)
        dataset.transform = transform_train
    else:
        if args.distributed:
            print("It is recommended to pre-compute the dataset cache "
                  "on a single-gpu first, as it will be faster")
        dataset = make_dataset(is_train=True)

        if args.cache_dataset:
            print("Saving dataset_train to {}".format(cache_path))
            utils.mkdir(os.path.dirname(cache_path))
            utils.save_on_master((dataset, traindir), cache_path)

    if hasattr(dataset, 'video_clips'):
        dataset.video_clips.compute_clips(args.clip_len, 1, frame_rate=15)

    print("Took", time.time() - st)

    print("Loading validation data")
    cache_path = _get_cache_path(valdir)

    transform_test = torchvision.transforms.Compose([
        T.ToFloatTensorInZeroOne(),
        # T.Resize((128, 171)),
        # normalize,
        # T.CenterCrop((112, 112))
        T.Resize((256, 256)),
        normalize
    ])

    if args.cache_dataset and os.path.exists(cache_path):
        print("Loading dataset_test from {}".format(cache_path))
        dataset_test, _ = torch.load(cache_path)
        dataset_test.transform = transform_test
    else:
        if args.distributed:
            print("It is recommended to pre-compute the dataset cache "
                  "on a single-gpu first, as it will be faster")
        # dataset_test = Kinetics400(
        #     valdir,
        #     frames_per_clip=args.clip_len,
        #     step_between_clips=1,
        #     transform=transform_test,
        #     extensions=('mp4')
        # )
        dataset_test = make_dataset(is_train=False)

        if args.cache_dataset:
            print("Saving dataset_test to {}".format(cache_path))
            utils.mkdir(os.path.dirname(cache_path))
            utils.save_on_master((dataset_test, valdir), cache_path)

    if hasattr(dataset, 'video_clips'):
        dataset_test.video_clips.compute_clips(args.clip_len, 1, frame_rate=15)

    def make_data_sampler(is_train, dataset):
        if hasattr(dataset, 'video_clips'):
            _sampler = RandomClipSampler if is_train else UniformClipSampler
            return _sampler(dataset.video_clips, args.clips_per_video)
        else:
            return torch.utils.data.sampler.RandomSampler(
                dataset) if is_train else None

    print("Creating data loaders")
    train_sampler, test_sampler = make_data_sampler(True, dataset), \
                                    make_data_sampler(False, dataset_test)
    # train_sampler = train_sampler(dataset.video_clips, args.clips_per_video)
    # test_sampler = test_sampler(dataset_test.video_clips, args.clips_per_video)

    if args.distributed:
        train_sampler = DistributedSampler(train_sampler)
        test_sampler = DistributedSampler(test_sampler)

    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=args.batch_size,
                                              sampler=train_sampler,
                                              num_workers=args.workers,
                                              pin_memory=True,
                                              collate_fn=collate_fn)

    data_loader_test = torch.utils.data.DataLoader(dataset_test,
                                                   batch_size=args.batch_size,
                                                   sampler=test_sampler,
                                                   num_workers=args.workers,
                                                   pin_memory=True,
                                                   collate_fn=collate_fn)

    print("Creating model")
    import resnet
    import timecycle as tc
    # model = resnet.__dict__[args.model](pretrained=args.pretrained)
    model = tc.TimeCycle(args)

    # utils.compute_RF_numerical(model.resnet, torch.ones(1, 3, 1, 112, 112).numpy())
    # import pdb; pdb.set_trace()
    # print(utils.compute_RF_numerical(model,img_np))

    model.to(device)

    if args.distributed and args.sync_bn:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

    criterion = nn.CrossEntropyLoss()

    lr = args.lr * args.world_size
    # optimizer = torch.optim.SGD(
    #     model.parameters(), lr=lr, momentum=args.momentum, weight_decay=args.weight_decay)
    optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
    if args.apex:
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=args.apex_opt_level)

    # convert scheduler to be per iteration, not per epoch, for warmup that lasts
    # between different epochs
    warmup_iters = args.lr_warmup_epochs * len(data_loader)
    lr_milestones = [len(data_loader) * m for m in args.lr_milestones]
    lr_scheduler = WarmupMultiStepLR(optimizer,
                                     milestones=lr_milestones,
                                     gamma=args.lr_gamma,
                                     warmup_iters=warmup_iters,
                                     warmup_factor=1e-5)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[args.gpu])
        model_without_ddp = model.module

    if args.data_parallel:
        model = torch.nn.parallel.DataParallel(model)
        model_without_ddp = model.module

    if args.resume:
        checkpoint = torch.load(args.resume, map_location='cpu')
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1

    if args.test_only:
        evaluate(model, criterion, data_loader_test, device=device)
        return

    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        train_one_epoch(model,
                        criterion,
                        optimizer,
                        lr_scheduler,
                        data_loader,
                        device,
                        epoch,
                        args.print_freq,
                        args.apex,
                        vis=vis)
        # evaluate(model, criterion, data_loader_test, device=device)
        if args.output_dir:
            checkpoint = {
                'model': model_without_ddp.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'epoch': epoch,
                'args': args
            }
            utils.save_on_master(
                checkpoint,
                os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))
            utils.save_on_master(
                checkpoint, os.path.join(args.output_dir, 'checkpoint.pth'))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))