def __init__(self, dataset_folder, spt_path, boxes_file, vid2idx, mode='train',get_loader=get_default_video_loader, sample_size=112, classes_idx=None): self.dataset_folder = dataset_folder self.sample_size = sample_size self.boxes_file = boxes_file self.vid2idx = vid2idx self.mode = mode self.data, self.max_frames, self.max_actions = make_dataset_names( dataset_folder, spt_path, boxes_file, mode) self.loader = get_loader() self.classes_idx = classes_idx # mean = [112.07945832, 112.87372333, 106.90993363] # ucf-101 24 classes mean = [103.29825354, 104.63845484, 90.79830328] # jhmdb from .png spatial_transform = Compose([Scale(sample_size), # [Resize(sample_size), ToTensor(), Normalize(mean, [1, 1, 1])]) self.spatial_transform=spatial_transform
os.path.join(root_path, dataset_cfg.dataset.boxes_file)) split_txt_path = os.path.abspath( os.path.join(root_path, dataset_cfg.dataset.split_txt_path)) ### get videos id actions = dataset_cfg.dataset.classes cls2idx = {actions[i]: i for i in range(0, len(actions))} vid2idx, vid_names = get_vid_dict(dataset_frames) # # get mean # mean = [112.07945832, 112.87372333, 106.90993363] # ucf-101 24 classes mean = [0.5, 0.5, 0.5] std = [0.5, 0.5, 0.5] spatial_transform = Compose([ Scale(sample_size), # [Resize(sample_size), ToTensor(), Normalize(mean, std) ]) temporal_transform = LoopPadding(sample_duration) n_classes = len(actions) ####################################################### # Part 1-1 - train nTPN - without reg # ####################################################### print(' -----------------------------------------------------') print('| Part 1-1 - train TPN - without reg |') print(' -----------------------------------------------------')
def get_default_video_loader(): image_loader = get_default_image_loader() return functools.partial(video_loader, image_loader=image_loader) if __name__ == '__main__': dataset_path = '/gpu-data2/sgal/UCF-101-frames' output_path = '/gpu-data2/sgal/UCF-101-pickle' classes = next(os.walk(dataset_path, True))[1] loader = get_default_video_loader() sample_size = 112 mean = [112.07945832, 112.87372333, 106.90993363] # ucf-101 24 classes spatial_transform = Compose([Scale(sample_size), # [Resize(sample_size), ToTensor(), Normalize(mean, [1, 1, 1])]) for cls in classes: videos = next(os.walk(os.path.join(dataset_path, cls), True))[1] if not os.path.exists(os.path.join(output_path, cls)): os.mkdir(os.path.join(output_path, cls)) for vid in videos: video_path = os.path.join(cls,vid) path = os.path.join(dataset_path,video_path) print(path) if not os.path.exists(os.path.join(output_path,cls,vid)): os.mkdir(os.path.join(output_path,cls,vid))
from utils.get_dataset_mean import get_dataset_mean_and_std from utils.create_video_id import get_vid_dict np.random.seed(42) dataset_folder = '../../dataset_frames' boxes_file = '../../dataset_actions_annots.json' split_txt = '../../00sequences.txt' sample_size = 112 vid2idx, vid_names = get_vid_dict(dataset_folder) scale = 1 rev_scale = 255 if scale == 1 else 1 mean, std = get_dataset_mean_and_std('kth', scale = 1) std = (0.5,0.5,0.5) mean = (0.5,0.5,0.5) print(f'mean {mean}, {std} std') spatial_transform = Compose([Scale(sample_size), ToTensor(), Normalize(mean, std) ]) print(f'boxes_file {boxes_file}') data = Video_Dataset_small_clip(video_path=dataset_folder, bboxes_file=boxes_file, split_txt_path=split_txt, spatial_transform=spatial_transform, scale=scale) # data = Video_Dataset_whole_video(video_path=dataset_folder, bboxes_file=boxes_file, # split_txt_path=split_txt, spatial_transform=spatial_transform, # vid2idx=vid2idx # ) ret = data[5] print(f'ret : {ret}')