def video_transform(input_config, clip_input): normalize = transforms.Normalize(mean=input_config['mean'], std=input_config['std']) video_transform = transforms.Compose([ transforms.Resize((256, 256)), transforms.RandomCrop((224, 224)), # transforms.CenterCrop((224, 224)), # we did not use center crop in our paper # transforms.RandomHorizontalFlip(), # we did not use mirror in our paper transforms.ToTensor(), normalize, ]) return video_transform(clip_input)
def prepare_data(): # split the video list vnames = get_video_names(cfg.DATASET.VIDEO_LIST) train_split = vnames test_split = vnames[:int(0.2 * len(train_split))] train_transforms = transforms.Compose([ video_transforms.Resize(cfg.DATA_TRANSFORM.LOADSIZE), video_transforms.RandomCrop(cfg.DATA_TRANSFORM.FINESIZE) ]) #video_transforms.RandomHorizontalFlip()]) test_transforms = transforms.Compose([ video_transforms.Resize(cfg.DATA_TRANSFORM.LOADSIZE), video_transforms.CenterCrop(cfg.DATA_TRANSFORM.FINESIZE) ]) video_root = cfg.DATASET.VIDEO_ROOT annot_path = cfg.DATASET.ANNOT_JSON_SAVE_PATH dataset = Dataset(train_split, video_root, annot_path, train_transforms) train_dataloader = torch.utils.data.DataLoader( dataset, batch_size=cfg.TRAIN.BATCH_SIZE, shuffle=True, num_workers=cfg.NUM_WORKERS, drop_last=True, pin_memory=True) video_root = cfg.DATASET.VIDEO_ROOT annot_path = cfg.DATASET.ANNOT_JSON_SAVE_PATH test_dataset = Dataset(test_split, video_root, annot_path, test_transforms) test_dataloader = torch.utils.data.DataLoader( test_dataset, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False, num_workers=cfg.NUM_WORKERS, drop_last=False, pin_memory=True) dataloaders = {'train': train_dataloader, 'test': test_dataloader} return dataloaders
normalize = transforms.Normalize(mean=input_config['mean'], std=input_config['std']) val_sampler = sampler.RandomSampling(num=args.clip_length, interval=args.frame_interval, speed=[1.0, 1.0]) val_loader = VideoIter( video_prefix=os.path.join(data_root, 'raw', 'data'), # change this part accordingly txt_list=os.path.join( data_root, 'raw', 'list_cvt', 'testlist01.txt'), # change this part accordingly sampler=val_sampler, force_color=True, video_transform=transforms.Compose([ transforms.Resize((256, 256)), transforms.RandomCrop((224, 224)), # transforms.CenterCrop((224, 224)), # we did not use center crop in our paper # transforms.RandomHorizontalFlip(), # we did not use mirror in our paper transforms.ToTensor(), normalize, ]), name='test', return_item_subpath=True, ) eval_iter = torch.utils.data.DataLoader( val_loader, batch_size=args.batch_size, shuffle=True, num_workers=4, # change this part accordingly pin_memory=True)
else: sym_net = torch.nn.DataParallel(sym_net) criterion = torch.nn.CrossEntropyLoss() net = static_model(net=sym_net, criterion=criterion, model_prefix=args.model_prefix) net.load_checkpoint(epoch=args.load_epoch) # data iterator: data_root = "../dataset/{}".format(args.dataset) video_location = os.path.join(data_root, 'raw', 'test_data') normalize = transforms.Normalize(mean=input_config['mean'], std=input_config['std']) val_sampler = sampler.RandomSampling(num=args.clip_length, interval=args.frame_interval, speed=[1.0, 1.0], seed=1) val_loader = VideoIter(video_prefix=video_location, csv_list=os.path.join(data_root, 'raw', 'list_cvt', args.list_file), sampler=val_sampler, force_color=True, video_transform=transforms.Compose([transforms.Resize((256,256)), transforms.RandomCrop((224,224)), transforms.ToTensor(), normalize]), name='predict', return_item_subpath=True,) eval_iter = torch.utils.data.DataLoader(val_loader, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # main loop net.net.eval() sum_batch_elapse = 0. softmax = torch.nn.Softmax(dim=1) field_names = ['VideoID', 'Video', 'ClassID'] pred_rows = [] pred_file = 'track1_pred.csv' i_batch = 0 for datas, targets, video_subpaths in eval_iter: