Example #1
0
    def __init__(self, dataset_folder, spt_path,  boxes_file, vid2idx, mode='train',get_loader=get_default_video_loader,
                 sample_size=112,  classes_idx=None):

        self.dataset_folder = dataset_folder
        self.sample_size = sample_size
        self.boxes_file = boxes_file
        self.vid2idx = vid2idx
        self.mode = mode
        self.data, self.max_frames, self.max_actions = make_dataset_names( dataset_folder, spt_path, boxes_file, mode)
        self.loader = get_loader()
        self.classes_idx = classes_idx
        # mean = [112.07945832, 112.87372333, 106.90993363]  # ucf-101 24 classes
        mean = [103.29825354, 104.63845484,  90.79830328]  # jhmdb from .png
        spatial_transform = Compose([Scale(sample_size),  # [Resize(sample_size),
                                     ToTensor(),
                                     Normalize(mean, [1, 1, 1])])
        self.spatial_transform=spatial_transform
        os.path.join(root_path, dataset_cfg.dataset.boxes_file))
    split_txt_path = os.path.abspath(
        os.path.join(root_path, dataset_cfg.dataset.split_txt_path))

    ### get videos id
    actions = dataset_cfg.dataset.classes
    cls2idx = {actions[i]: i for i in range(0, len(actions))}
    vid2idx, vid_names = get_vid_dict(dataset_frames)

    # # get mean
    # mean = [112.07945832, 112.87372333, 106.90993363]  # ucf-101 24 classes
    mean = [0.5, 0.5, 0.5]
    std = [0.5, 0.5, 0.5]

    spatial_transform = Compose([
        Scale(sample_size),  # [Resize(sample_size),
        ToTensor(),
        Normalize(mean, std)
    ])
    temporal_transform = LoopPadding(sample_duration)

    n_classes = len(actions)

    #######################################################
    #          Part 1-1 - train nTPN - without reg         #
    #######################################################

    print(' -----------------------------------------------------')
    print('|          Part 1-1 - train TPN - without reg         |')
    print(' -----------------------------------------------------')
Example #3
0
def get_default_video_loader():
    image_loader = get_default_image_loader()
    return functools.partial(video_loader, image_loader=image_loader)

if __name__ == '__main__':

    dataset_path = '/gpu-data2/sgal/UCF-101-frames'
    output_path =  '/gpu-data2/sgal/UCF-101-pickle'
    
    classes = next(os.walk(dataset_path, True))[1]
    loader = get_default_video_loader()
    
    sample_size = 112
    mean = [112.07945832, 112.87372333, 106.90993363]  # ucf-101 24 classes

    spatial_transform = Compose([Scale(sample_size),  # [Resize(sample_size),
                                 ToTensor(),
                                 Normalize(mean, [1, 1, 1])])

    for cls in classes:

        videos = next(os.walk(os.path.join(dataset_path, cls), True))[1]

        if not os.path.exists(os.path.join(output_path, cls)):
            os.mkdir(os.path.join(output_path, cls))
        for vid in videos:
            video_path = os.path.join(cls,vid)
            path = os.path.join(dataset_path,video_path)
            print(path)            
            if not os.path.exists(os.path.join(output_path,cls,vid)):
                os.mkdir(os.path.join(output_path,cls,vid))
    from utils.get_dataset_mean import get_dataset_mean_and_std
    from utils.create_video_id import get_vid_dict
    np.random.seed(42)
    dataset_folder = '../../dataset_frames'
    boxes_file = '../../dataset_actions_annots.json'
    split_txt = '../../00sequences.txt'

    sample_size = 112
    vid2idx, vid_names = get_vid_dict(dataset_folder)
    scale = 1
    rev_scale = 255 if scale == 1 else 1
    mean, std = get_dataset_mean_and_std('kth', scale = 1)

    std = (0.5,0.5,0.5)
    mean = (0.5,0.5,0.5)
    print(f'mean {mean}, {std} std')
    spatial_transform = Compose([Scale(sample_size), ToTensor(),
                                 Normalize(mean, std)
                                 ])
    print(f'boxes_file {boxes_file}')
    data = Video_Dataset_small_clip(video_path=dataset_folder, bboxes_file=boxes_file,
                                    split_txt_path=split_txt, spatial_transform=spatial_transform, scale=scale)


    # data = Video_Dataset_whole_video(video_path=dataset_folder, bboxes_file=boxes_file,
    #                                  split_txt_path=split_txt, spatial_transform=spatial_transform,
    #                                  vid2idx=vid2idx
    #                                  )

    ret = data[5]
    print(f'ret : {ret}')