def __init__(self, dataset_folder, spt_path, boxes_file, vid2idx, mode='train',get_loader=get_default_video_loader, sample_size=112, classes_idx=None): self.dataset_folder = dataset_folder self.sample_size = sample_size self.boxes_file = boxes_file self.vid2idx = vid2idx self.mode = mode self.data, self.max_frames, self.max_actions = make_dataset_names( dataset_folder, spt_path, boxes_file, mode) self.loader = get_loader() self.classes_idx = classes_idx # mean = [112.07945832, 112.87372333, 106.90993363] # ucf-101 24 classes mean = [103.29825354, 104.63845484, 90.79830328] # jhmdb from .png spatial_transform = Compose([Scale(sample_size), # [Resize(sample_size), ToTensor(), Normalize(mean, [1, 1, 1])]) self.spatial_transform=spatial_transform
split_txt_path = os.path.abspath( os.path.join(root_path, dataset_cfg.dataset.split_txt_path)) ### get videos id actions = dataset_cfg.dataset.classes cls2idx = {actions[i]: i for i in range(0, len(actions))} vid2idx, vid_names = get_vid_dict(dataset_frames) # # get mean # mean = [112.07945832, 112.87372333, 106.90993363] # ucf-101 24 classes mean = [0.5, 0.5, 0.5] std = [0.5, 0.5, 0.5] spatial_transform = Compose([ Scale(sample_size), # [Resize(sample_size), ToTensor(), Normalize(mean, std) ]) temporal_transform = LoopPadding(sample_duration) n_classes = len(actions) ####################################################### # Part 1-1 - train nTPN - without reg # ####################################################### print(' -----------------------------------------------------') print('| Part 1-1 - train TPN - without reg |') print(' -----------------------------------------------------') ## Define Dataloaders
cls2idx = { classes[i] : i for i in range(0, len(classes)) } dataset_folder = '/gpu-data2/sgal/JHMDB-act-detector-frames' splt_txt_path = '/gpu-data2/sgal/splits' boxes_file = '/gpu-data2/sgal/poses.json' sample_size = 112 sample_duration = 16 #len(images) batch_size = 1 n_threads = 0 mean = [103.29825354, 104.63845484, 90.79830328] # jhmdb from .png spatial_transform = Compose([Scale(sample_size), # [Resize(sample_size), # CenterCrop(sample_size), ToTensor(), Normalize(mean, [1, 1, 1])]) temporal_transform = LoopPadding(sample_duration) data = Video(dataset_folder, frames_dur=sample_duration, spatial_transform=spatial_transform, temporal_transform=temporal_transform, json_file = boxes_file, split_txt_path=splt_txt_path, mode='train', classes_idx=cls2idx) data_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=False, num_workers=n_threads, pin_memory=True) # clips, (h,w), gt_tubes, gt_bboxes, n_actions, n_frames = next(data_loader.__iter__()) # for i in data: # clips, (h,w), gt_tubes, gt_bboxes, n_actions, n_frames = i # # print('gt_bboxes.shape :',gt_bboxes) # # print('gt_bboxes.shape :',gt_bboxes.shape) # # print('gt_tubes :',gt_tubes) # # print('clips.shape :',clips.shape)
sample_size = 112 sample_duration = 16 # len(images) batch_size = 1 n_threads = 0 # # get mean # mean = [103.75581543 104.79421473 91.16894564] # jhmdb mean = [103.29825354, 104.63845484, 90.79830328] # jhmdb from .png # generate model last_fc = False scale_size = [sample_size, sample_size] spatial_transform = Compose([ Scale(sample_size), # [Resize(sample_size), ToTensor() ]) temporal_transform = LoopPadding(sample_duration) spatial_transform2 = Compose([ # [Resize(sample_size), ToTensor() ]) ## UCF code dataset_folder = '/gpu-data/sgal/UCF-101-frames' boxes_file = './pyannot.pkl' actions = [ 'Basketball', 'BasketballDunk', 'Biking', 'CliffDiving', 'CricketBowling', 'Diving', 'Fencing', 'FloorGymnastics', 'GolfSwing', 'HorseRiding', 'IceDancing', 'LongJump', 'PoleVault', 'RopeClimbing', 'SalsaSpin', 'SkateBoarding', 'Skiing', 'Skijet', 'SoccerJuggling',