def video_transform(input_config, clip_input):
    normalize = transforms.Normalize(mean=input_config['mean'], std=input_config['std'])
    video_transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomCrop((224, 224)),
        # transforms.CenterCrop((224, 224)), # we did not use center crop in our paper
        # transforms.RandomHorizontalFlip(), # we did not use mirror in our paper
        transforms.ToTensor(),
        normalize,
    ])

    return video_transform(clip_input)
Exemple #2
0
def prepare_data():
    # split the video list
    vnames = get_video_names(cfg.DATASET.VIDEO_LIST)
    train_split = vnames
    test_split = vnames[:int(0.2 * len(train_split))]

    train_transforms = transforms.Compose([
        video_transforms.Resize(cfg.DATA_TRANSFORM.LOADSIZE),
        video_transforms.RandomCrop(cfg.DATA_TRANSFORM.FINESIZE)
    ])
    #video_transforms.RandomHorizontalFlip()])

    test_transforms = transforms.Compose([
        video_transforms.Resize(cfg.DATA_TRANSFORM.LOADSIZE),
        video_transforms.CenterCrop(cfg.DATA_TRANSFORM.FINESIZE)
    ])

    video_root = cfg.DATASET.VIDEO_ROOT
    annot_path = cfg.DATASET.ANNOT_JSON_SAVE_PATH
    dataset = Dataset(train_split, video_root, annot_path, train_transforms)
    train_dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=cfg.TRAIN.BATCH_SIZE,
        shuffle=True,
        num_workers=cfg.NUM_WORKERS,
        drop_last=True,
        pin_memory=True)

    video_root = cfg.DATASET.VIDEO_ROOT
    annot_path = cfg.DATASET.ANNOT_JSON_SAVE_PATH
    test_dataset = Dataset(test_split, video_root, annot_path, test_transforms)
    test_dataloader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=cfg.TEST.BATCH_SIZE,
        shuffle=False,
        num_workers=cfg.NUM_WORKERS,
        drop_last=False,
        pin_memory=True)

    dataloaders = {'train': train_dataloader, 'test': test_dataloader}
    return dataloaders
    normalize = transforms.Normalize(mean=input_config['mean'],
                                     std=input_config['std'])
    val_sampler = sampler.RandomSampling(num=args.clip_length,
                                         interval=args.frame_interval,
                                         speed=[1.0, 1.0])
    val_loader = VideoIter(
        video_prefix=os.path.join(data_root, 'raw',
                                  'data'),  # change this part accordingly
        txt_list=os.path.join(
            data_root, 'raw', 'list_cvt',
            'testlist01.txt'),  # change this part accordingly
        sampler=val_sampler,
        force_color=True,
        video_transform=transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.RandomCrop((224, 224)),
            # transforms.CenterCrop((224, 224)), # we did not use center crop in our paper
            # transforms.RandomHorizontalFlip(), # we did not use mirror in our paper
            transforms.ToTensor(),
            normalize,
        ]),
        name='test',
        return_item_subpath=True,
    )

    eval_iter = torch.utils.data.DataLoader(
        val_loader,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=4,  # change this part accordingly
        pin_memory=True)
Exemple #4
0
	else:
		sym_net = torch.nn.DataParallel(sym_net)
		criterion = torch.nn.CrossEntropyLoss()

	net = static_model(net=sym_net, criterion=criterion, model_prefix=args.model_prefix)
	net.load_checkpoint(epoch=args.load_epoch)

	# data iterator:
	data_root = "../dataset/{}".format(args.dataset)
	video_location = os.path.join(data_root, 'raw', 'test_data')

	normalize = transforms.Normalize(mean=input_config['mean'], std=input_config['std'])

	val_sampler = sampler.RandomSampling(num=args.clip_length, interval=args.frame_interval, speed=[1.0, 1.0], seed=1)
	val_loader = VideoIter(video_prefix=video_location, csv_list=os.path.join(data_root, 'raw', 'list_cvt', args.list_file), sampler=val_sampler,
					  force_color=True, video_transform=transforms.Compose([transforms.Resize((256,256)), transforms.RandomCrop((224,224)), transforms.ToTensor(), normalize]),
					  name='predict', return_item_subpath=True,)

	eval_iter = torch.utils.data.DataLoader(val_loader, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True)

	# main loop
	net.net.eval()
	sum_batch_elapse = 0.
	softmax = torch.nn.Softmax(dim=1)
	field_names = ['VideoID', 'Video', 'ClassID']
	pred_rows = []
	pred_file = 'track1_pred.csv'

	i_batch = 0
	for datas, targets, video_subpaths in eval_iter: