コード例 #1
0
    def __init__(self, opt):
        """Initialize this dataset class.

        Parameters:
            opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
        """
        BaseDataset.__init__(self, opt)
        self.dir_A = os.path.join(opt.dataroot, opt.phase + 'A')  # create a path '/path/to/data/trainA'
        self.dir_B = os.path.join(opt.dataroot, opt.phase + 'B')  # create a path '/path/to/data/trainB'

        self.classes_A, self.class_to_idx_A = self._find_classes(self.dir_A) # find classes in  '/path/to/data/trainA'
        self.classes_B, self.class_to_idx_B = self._find_classes(self.dir_B) # find classes in  '/path/to/data/trainB'
        samples_A = make_dataset(self.dir_A, self.class_to_idx_A, extensions=self.img_extension, is_valid_file=None) # samples (list): List of (sample path, class_index) tuples
        samples_B = make_dataset(self.dir_B, self.class_to_idx_B, extensions=self.img_extension, is_valid_file=None) # samples (list): List of (sample path, class_index) tuples
        self.A_paths = [s[0] for s in samples_A]
        self.B_paths = [s[0] for s in samples_B]
        self.A_targets = [s[1] for s in samples_A]
        self.B_targets = [s[1] for s in samples_B]
        self.A_size = len(self.A_paths)  # get the size of dataset A
        self.B_size = len(self.B_paths)  # get the size of dataset B
        btoA = self.opt.direction == 'BtoA'
        input_nc = self.opt.output_nc if btoA else self.opt.input_nc       # get the number of channels of input image
        output_nc = self.opt.input_nc if btoA else self.opt.output_nc      # get the number of channels of output image
        self.transform_A = get_transform(self.opt, grayscale=(input_nc == 1))
        self.transform_B = get_transform(self.opt, grayscale=(output_nc == 1))
コード例 #2
0
ファイル: DataTools.py プロジェクト: laurentperrinet/SPC_2L
    def __init__(self,
                 root,
                 transform=None,
                 target_transform=None,
                 loader=default_loader,
                 retun_idx=False):
        classes, class_to_idx = find_classes(root)
        IMG_EXTENSIONS = [
            '.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif'
        ]

        try:
            imgs = make_dataset(root, class_to_idx, IMG_EXTENSIONS)
        except:
            imgs = make_dataset(root, class_to_idx)

        if len(imgs) == 0:
            raise (RuntimeError("Found 0 images in subfolders of: " + root +
                                "\n"
                                "Supported image extensions are: " +
                                ",".join(IMG_EXTENSIONS)))

        self.root = root
        self.imgs = imgs
        self.classes = classes
        self.class_to_idx = class_to_idx
        self.transform = transform
        self.target_transform = target_transform
        self.loader = loader
        self.retun_idx = retun_idx
コード例 #3
0
ファイル: imagenet.py プロジェクト: sinnr1992/curvature
def imagenet(root: str,
             img_size: int = 224,
             batch_size: int = 32,
             augment: bool = True,
             shuffle: bool = True,
             workers: int = 6,
             splits: Union[str, Tuple[str, str], Tuple[str, str, str]] = ('train', 'val'),
             seed: int = 42):

    train_dir = os.path.join(root, 'train')
    val_test_dir = os.path.join(root, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    val_transform_list = [transforms.Resize(int(img_size * 8 / 7)),
                          transforms.CenterCrop(img_size),
                          transforms.ToTensor(),
                          normalize]
    val_transform = transforms.Compose(val_transform_list)
    val_mapper = train_mapper = DatasetMapper(val_transform)

    if augment:
        train_transform_list = [transforms.RandomResizedCrop(img_size),
                                transforms.RandomHorizontalFlip(),
                                transforms.ToTensor(),
                                normalize]
        train_transform = transforms.Compose(train_transform_list)
        train_mapper = DatasetMapper(train_transform)

    loader_list = list()
    if "train" in splits:
        classes, class_to_idx = find_classes(train_dir)
        dataset = make_dataset(train_dir, class_to_idx, IMG_EXTENSIONS)
        dataset = DatasetFromList(dataset)
        dataset = MapDataset(dataset, train_mapper)
        loader_list.append(data.DataLoader(dataset, batch_size, shuffle=shuffle, num_workers=workers, pin_memory=True,
                                           worker_init_fn=worker_init_reset_seed))
    if "val" or "test" in splits:
        classes, class_to_idx = find_classes(val_test_dir)
        val_test_set = make_dataset(val_test_dir, class_to_idx, IMG_EXTENSIONS)

        random.seed(seed)
        random.shuffle(val_test_set)
        val_set = val_test_set[:int(round(len(val_test_set) / 2))]
        test_set = val_test_set[int(round(len(val_test_set) / 2)):]

        if "val" in splits:
            val_set = DatasetFromList(val_set)
            val_set = MapDataset(val_set, val_mapper)
            loader_list.append(data.DataLoader(val_set, batch_size, num_workers=workers, pin_memory=True))
        if "test" in splits:
            test_set = DatasetFromList(test_set)
            test_set = MapDataset(test_set, val_mapper)
            loader_list.append(data.DataLoader(test_set, batch_size, num_workers=workers, pin_memory=True))

    if len(loader_list) == 1:
        return loader_list[0]
    return loader_list
コード例 #4
0
 def __init__(self, root_path, train_dir, valid_dir):
     self.classes, self.class_to_idx = find_classes(root_path / 'train')
     train_samples = make_dataset(root_path / 'train',
                                  self.class_to_idx,
                                  extensions=IMG_EXTENSIONS)
     valid_samples = make_dataset(root_path / 'valid',
                                  self.class_to_idx,
                                  extensions=IMG_EXTENSIONS)
     self.samples = train_samples + valid_samples
コード例 #5
0
 def __init__(self, image_path, label_path, transform, image_extensions):
     super(LoadDataset, self).__init__(image_path, transform=transform,
                                         target_transform=None)
     self.image_path = image_path
     self.label_path = label_path
     self.transform = transform
     self.image_extensions = image_extensions
     ##
     classes, class_to_idx = self._find_classes(self.image_path)
     self.image_files = make_dataset(self.image_path, class_to_idx, extensions=self.image_extensions, is_valid_file=None)
     self.label_files = make_dataset(self.label_path, class_to_idx, extensions=self.image_extensions, is_valid_file=None)
コード例 #6
0
def test_make_dataset_no_valid_files(tmpdir, kwargs, expected_error_msg):
    tmpdir = pathlib.Path(tmpdir)

    (tmpdir / "a").mkdir()
    (tmpdir / "a" / "a.png").touch()

    (tmpdir / "b").mkdir()
    (tmpdir / "b" / "b.jpeg").touch()

    (tmpdir / "c").mkdir()
    (tmpdir / "c" / "c.unknown").touch()

    with pytest.raises(FileNotFoundError, match=expected_error_msg):
        make_dataset(str(tmpdir), **kwargs)
コード例 #7
0
ファイル: kinetics.py プロジェクト: yyht/videowalk
    def __init__(self,
                 root,
                 frames_per_clip,
                 step_between_clips=1,
                 frame_rate=None,
                 extensions=('mp4', ),
                 transform=None,
                 cached=None,
                 _precomputed_metadata=None):
        super(Kinetics400, self).__init__(root)
        extensions = extensions

        classes = list(sorted(list_dir(root)))
        class_to_idx = {classes[i]: i for i in range(len(classes))}

        self.samples = make_dataset(self.root,
                                    class_to_idx,
                                    extensions,
                                    is_valid_file=None)
        self.classes = classes
        video_list = [x[0] for x in self.samples]
        self.video_clips = VideoClips(
            video_list,
            frames_per_clip,
            step_between_clips,
            frame_rate,
            _precomputed_metadata,
        )
        self.transform = transform
コード例 #8
0
ファイル: multi_imagefolder.py プロジェクト: flavda/datasets
    def __init__(self,
                 roots,
                 transform=None,
                 target_transform=None,
                 loader=default_loader):
        assert isinstance(roots, (tuple, list))
        self.classes_list, self.class_to_idx_list, self.imgs_list = [], [], []
        for root in roots:
            classes, class_to_idx = find_classes(root)
            imgs = make_dataset(root, class_to_idx)
            if len(imgs) == 0:
                raise (RuntimeError("Found 0 images in subfolders of: " +
                                    root + "\n"
                                    "Supported image extensions are: " +
                                    ",".join(IMG_EXTENSIONS)))

            # add them to the list
            self.classes_list.append(classes)
            self.class_to_idx_list.append(class_to_idx)
            self.imgs_list.append(imgs)

        # sanity check that we have the same number of samples
        num_imgs = len(self.imgs_list[0])
        for imgs in self.imgs_list:
            assert len(imgs) == num_imgs

        self.roots = roots
        self.transform = transform
        self.target_transform = target_transform
        self.loader = loader
コード例 #9
0
    def __init__(self,
                 root_dir,
                 extensions,
                 transforms=None,
                 target_transforms=None,
                 test=False):
        super(CheckerboardDataset, self).__init__(root_dir, transforms,
                                                  target_transforms)

        file_name_list = os.listdir(root_dir)
        labels_list = range(1000)
        self.dataset_labels = dict(zip(file_name_list, labels_list))
        self.root_dir = root_dir
        self.transforms = transforms
        self.target_transforms = target_transforms
        self.extensions = extensions
        classes, class_to_idx = self._find_classes(self.root_dir)
        samples = make_dataset(root_dir,
                               class_to_idx,
                               self.extensions,
                               is_valid_file=None)
        print(samples)
        self.classes = classes
        self.class_to_idx = class_to_idx
        self.samples = samples
        self.targets = [s[1] for s in samples]
コード例 #10
0
    def __init__(self,
                 root,
                 loader,
                 extensions=None,
                 transform=None,
                 target_transform=None,
                 is_valid_file=None):
        super(DatasetFolder, self).__init__(root,
                                            transform=transform,
                                            target_transform=target_transform)
        classes, class_to_idx = self._find_classes(self.root)
        samples = make_dataset(self.root, class_to_idx, extensions,
                               is_valid_file)
        if len(samples) == 0:
            raise (RuntimeError("Found 0 files in subfolders of: " +
                                self.root + "\n"
                                "Supported extensions are: " +
                                ",".join(extensions)))

        self.loader = loader
        self.extensions = extensions

        self.classes = classes
        self.class_to_idx = class_to_idx
        self.samples = samples
        self.targets = [s[1] for s in samples]
コード例 #11
0
    def __init__(self, root, train=True, transform=None, download=True):
        self.root = root
        root = os.path.join(root, self.foldername)

        if download:
            self.download()

        classes, class_to_idx = self._find_classes(root)
        samples = make_dataset(root, class_to_idx, IMG_EXTENSIONS)

        datapaths = defaultdict(list)

        for path, target in samples:
            datapaths[target].append(path)

        for target in datapaths.keys():
            if train:
                datapaths[target] = datapaths[target][:int(0.8 * len(datapaths[target]))]
            else:
                datapaths[target] = datapaths[target][int(0.8 * len(datapaths[target])):]

        newdatapaths = []
        labels = []
        for target in datapaths.keys():
            for path in datapaths[target]:
                newdatapaths.append(path)
                labels.append(target)

        self.train = train
        self.transform = transform
        self.labels = labels
        self.datapaths = newdatapaths
        self.cache = {}
コード例 #12
0
    def __init__(self,
                 root_list,
                 transform=None,
                 target_transform=None,
                 loader=default_loader):
        if not isinstance(root_list, (list, tuple)):
            raise RuntimeError(
                "dataset_list should be a list of strings, got {}".format(
                    dataset_list))

        super(ImageFolderList,
              self).__init__(root_list[0],
                             loader,
                             IMG_EXTENSIONS,
                             transform=transform,
                             target_transform=target_transform)
        if len(root_list) > 1:
            for root in root_list[1:]:
                classes, class_to_idx = self._find_classes(root)
                for k in class_to_idx.keys():
                    class_to_idx[k] += len(self.classes)
                samples = make_dataset(root, class_to_idx, IMG_EXTENSIONS)
                self.classes += classes
                self.class_to_idx.update(class_to_idx)
                self.samples += samples
        self.targets = [s[1] for s in self.samples]
        self.imgs = self.samples
コード例 #13
0
ファイル: dataset.py プロジェクト: keng000/lagavulin
def train_test_split_for_dir(root_path: Path,
                             test_size: float,
                             random_state: int = 42):
    """
    torchvision.datasets.ImageFolder 形式のディレクトリ構造になっているデータセットを train / test に分割し、
    train, testそれぞれを `root_path` と同じ階層に `train/` `val/` して保存する。

    TODO: この関数内でTrain, Test用のImageFolderを作成する方が良いのか考える。
    """
    if not root_path.exists():
        raise FileNotFoundError
    elif not (0 <= test_size <= 1.0):
        raise ValueError

    classes, class_to_idx = find_classes(root_path)
    dataset = make_dataset(root_path, class_to_idx, IMG_EXTENSIONS)
    train, val = train_test_split(dataset,
                                  test_size=test_size,
                                  shuffle=True,
                                  random_state=random_state)

    split_dataset = {'train': train, 'val': val}

    dst_path_root = root_path.parent
    for set_ in ['train', 'val']:
        for file_path, class_ in tqdm(split_dataset[set_], desc=set_):
            file_path = Path(file_path)
            dst_dir = dst_path_root / set_ / str(class_)
            dst_dir.mkdir(exist_ok=True, parents=True)
            shutil.copy(file_path, dst_dir / file_path.name)
コード例 #14
0
def read_all_images(root, num_workers=4):
    classes, class_to_idx = find_classes(root)
    dataset = make_dataset(root, class_to_idx)
    if len(dataset) == 0:
        raise (RuntimeError("Found 0 images in subfolders of: " + root + "\n" +
                            "Supported image extensions are: " +
                            ",".join(IMG_EXTENSIONS)))

    num_images = len(dataset)
    paths = [dataset[i_image][0] for i_image in range(num_images)]

    print("Reading {0} images with {1} workers".format(num_images,
                                                       num_workers))
    if num_workers > 1:
        images = parallel_process(paths,
                                  read_image_for_pytorch,
                                  n_jobs=num_workers)
    else:
        images = []
        for p in tqdm(paths):
            images.append(read_image_for_pytorch(p))

    image_cache = {}
    for i, image in enumerate(images):
        path, target = dataset[i]
        image_cache[path] = image
    return image_cache
コード例 #15
0
    def __init__(
        self, *roots, transforms=None, target_transforms=None, loader=default_loader
    ):
        classes_ = []
        class_to_idx_ = []
        samples_ = []
        for root in roots:
            classes, class_to_idx = find_classes(root)
            samples = make_dataset(root, class_to_idx, IMG_EXTENSIONS)
            if len(samples) == 0:
                raise (
                    RuntimeError(
                        "Found 0 files in subfolders of: " + root + "\n"
                        "Supported extensions are: " + ",".join(IMG_EXTENSIONS)
                    )
                )
            classes_.append(classes)
            class_to_idx_.append(class_to_idx_)
            samples_.append(samples)
            if len(samples_[0]) != len(samples):
                raise ValueError(
                    "Dataset folders must have the same number of samples."
                )
            if len(classes_[0]) != len(classes):
                raise ValueError(
                    "Dataset folders must have the same number of classes."
                )
        super().__init__(roots, samples_, transforms, target_transforms)

        self.loader = loader
        self.extensions = IMG_EXTENSIONS

        self.classes = classes_
        self.class_to_idx = class_to_idx_
コード例 #16
0
    def __init__(self,
                 root,
                 transform=None,
                 target_transform=None,
                 loader=default_loader,
                 is_valid_file=None,
                 valid_classes=None):
        super(DatasetFolder, self).__init__(root,
                                            transform=transform,
                                            target_transform=target_transform)
        classes, class_to_idx = self._find_classes(self.root,
                                                   valid_classes=valid_classes)
        extensions = torchvision.datasets.folder.IMG_EXTENSIONS if is_valid_file is None else None
        samples = make_dataset(self.root, class_to_idx, extensions,
                               is_valid_file)
        if len(samples) == 0:
            raise (RuntimeError("Found 0 files in subfolders of: " +
                                self.root + "\n"
                                "Supported extensions are: " +
                                ",".join(extensions)))

        self.loader = loader
        self.extensions = extensions

        self.classes = classes
        self.class_to_idx = class_to_idx
        self.samples = samples
        self.targets = [s[1] for s in samples]

        self.imgs = self.samples
コード例 #17
0
ファイル: data.py プロジェクト: tanjasper/SecureVision
    def __init__(self, roots, loader, extensions, transform=None, target_transform=None):

        samples = []
        root_lengths = []

        for i in range(len(roots)):
            root = roots[i]
            classes, class_to_idx = self._find_classes(root)
            temp_samples = make_dataset(root, class_to_idx, extensions)  # should be the path names of the images
            samples = samples + temp_samples
            root_lengths.append(len(samples))
            if len(samples) == 0:
                raise(RuntimeError("Found 0 files in subfolders of: " + root + "\n"
                                   "Supported extensions are: " + ",".join(extensions)))

        self.root = root
        self.loader = loader
        self.extensions = extensions

        self.classes = classes
        self.class_to_idx = class_to_idx
        self.samples = samples
        #self.targets = [s[1] for s in samples]
        self.targets = [i for (i, s) in enumerate(root_lengths) for a in range(s)]
        # Above line makes a list of idxs for each image where the idx is the root idx
        # s is the root_length, i is the root_idx. For each s, repeat idx i s times.

        self.transform = transform
        self.target_transform = target_transform
コード例 #18
0
    def from_directory(cls, dir_path: str) -> LabeledVideoPaths:
        """
        Factory function that creates a LabeledVideoPaths object by parsing the structure
        of the given directory's subdirectories into the classification labels. It
        expects the directory format to be the following:
             dir_path/<class_name>/<video_name>.mp4

        Classes are indexed from 0 to the number of classes, alphabetically.

        E.g.
            dir_path/class_x/xxx.ext
            dir_path/class_x/xxy.ext
            dir_path/class_x/xxz.ext
            dir_path/class_y/123.ext
            dir_path/class_y/nsdf3.ext
            dir_path/class_y/asd932_.ext

        Would produce two classes labeled 0 and 1 with 3 videos paths associated with each.

        Args:
            dir_path (str): Root directory to the video class directories .
        """
        assert g_pathmgr.exists(dir_path), f"{dir_path} not found."

        # Find all classes based on directory names. These classes are then sorted and indexed
        # from 0 to the number of classes.
        classes = sorted(
            (f for f in pathlib.Path(dir_path).iterdir() if f.is_dir()))
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        video_paths_and_label = make_dataset(dir_path,
                                             class_to_idx,
                                             extensions=("mp4", "avi"))
        assert (len(video_paths_and_label) >
                0), f"Failed to load dataset from {dir_path}."
        return cls(video_paths_and_label)
コード例 #19
0
    def load_data(
            self,
            data: Union[str, Tuple[List[str], List[Any]]],
            dataset: Optional[Any] = None) -> Sequence[Mapping[str, Any]]:
        if self.isdir(data):
            classes, class_to_idx = self.find_classes(data)
            if not classes:
                return self.predict_load_data(data)
            else:
                self.set_state(LabelsState(classes))

            if dataset is not None:
                dataset.num_classes = len(classes)

            data = make_dataset(data, class_to_idx, extensions=self.extensions)
            return [{
                DefaultDataKeys.INPUT: input,
                DefaultDataKeys.TARGET: target
            } for input, target in data]
        return list(
            filter(
                lambda sample: has_file_allowed_extension(
                    sample[DefaultDataKeys.INPUT], self.extensions),
                super().load_data(data, dataset),
            ))
コード例 #20
0
    def __init__(self, root, loader, extensions, transform=None, target_transform=None):
        classes, class_to_idx = find_classes(root)
        samples_cache_path = os.path.join(root, 'samples.pickle')
        if os.path.exists(samples_cache_path):
            with open(samples_cache_path, 'rb') as rf:
                samples = pickle.load(rf)
            print('=> read {} samples from cache: {}'.format(len(samples), samples_cache_path))
        else:
            samples = make_dataset(root, class_to_idx, extensions)
            if os.access(root, os.W_OK):
                print('=> caching {} samples to: {}'.format(len(samples), samples_cache_path))
                with open(samples_cache_path, 'wb') as wf:
                    pickle.dump(samples, wf)
        if len(samples) == 0:
            raise (RuntimeError("Found 0 files in subfolders of: " + root + "\n"
                                                                            "Supported extensions are: " + ",".join(
                extensions)))

        self.root = root
        self.loader = loader
        self.extensions = extensions

        self.classes = classes
        self.class_to_idx = class_to_idx
        self.samples = samples

        self.transform = transform
        self.target_transform = target_transform
コード例 #21
0
    def __init__(self,
                 root,
                 annotation_path,
                 frames_per_clip,
                 step_between_clips=1,
                 fold=1,
                 train=True,
                 framewiseTransform=False,
                 transform=None):
        super(HMDB51, self).__init__(root)
        if not 1 <= fold <= 3:
            raise ValueError(
                "fold should be between 1 and 3, got {}".format(fold))

        extensions = ('avi', )
        self.fold = fold
        self.train = train

        classes = list(sorted(list_dir(root)))
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        self.samples = make_dataset(self.root,
                                    class_to_idx,
                                    extensions,
                                    is_valid_file=None)
        self.classes = classes
        video_list = [x[0] for x in self.samples]
        video_clips = VideoClips(video_list, frames_per_clip,
                                 step_between_clips)
        self.indices = self._select_fold(video_list, annotation_path, fold,
                                         train)
        self.video_clips = video_clips.subset(self.indices)
        self.video_list = [video_list[i] for i in self.indices]
        self.framewiseTransform = framewiseTransform
        self.transform = transform
コード例 #22
0
    def __init__(self,
                 root,
                 loader,
                 extensions,
                 transform=None,
                 target_transform=None,
                 label_probability=False):
        classes, class_to_idx = self._find_classes(root)
        samples = make_dataset(root, class_to_idx, extensions)
        if len(samples) == 0:
            raise (RuntimeError("Found 0 files in subfolders of: " + root +
                                "\n"
                                "Supported extensions are: " +
                                ",".join(extensions)))
        self.root = root
        self.loader = loader
        self.extensions = extensions
        self.label_probability = label_probability

        self.classes = classes
        self.nclasses = len(self.classes)
        self.class_to_idx = class_to_idx
        self.samples = samples
        if self.label_probability:
            tmp = numpy.zeros((len(samples), self.nclasses), dtype='f')
            for (i, s) in enumerate(samples):
                tmp[i, s[1]] = 1
            self.targets = tmp
        else:
            self.targets = [s[1] for s in samples]
        self.transform = transform
        self.target_transform = target_transform
コード例 #23
0
    def __init__(
        self,
        root: str,
        loader: Callable[[str], Any],
        extensions: Optional[Tuple[str, ...]] = None,
        input_transform: Optional[Callable] = None,
        reconstruction_transform: Optional[Callable] = None,
        target_transform: Optional[Callable] = None,
        is_valid_file: Optional[Callable[[str], bool]] = None,
    ) -> None:
        super(GenerativeDatasetFolder,
              self).__init__(root,
                             transform=input_transform,
                             target_transform=target_transform)
        classes, class_to_idx = self._find_classes(self.root)
        samples = make_dataset(self.root, class_to_idx, extensions,
                               is_valid_file)
        if len(samples) == 0:
            msg = "Found 0 files in subfolders of: {}\n".format(self.root)
            if extensions is not None:
                msg += "Supported extensions are: {}".format(
                    ",".join(extensions))
            raise RuntimeError(msg)

        self.r_transform = reconstruction_transform

        self.loader = loader
        self.extensions = extensions

        self.classes = classes
        self.class_to_idx = class_to_idx
        self.samples = samples
        self.targets = [s[1] for s in samples]
コード例 #24
0
def load_format_paths(folder_path, extension):

    classes, class_to_idx = find_classes(folder_path)
    samples = make_dataset(folder_path, class_to_idx, [extension])
    paths = np.array([s[0] for s in samples])
    classes = np.array([int(s[1]) for s in samples])

    return paths, classes
コード例 #25
0
    def __init__(self,
                 root,
                 annotation_path,
                 frames_per_clip,
                 step_between_clips=1,
                 frame_rate=None,
                 fold=1,
                 train=True,
                 transform=None,
                 _precomputed_metadata=None,
                 num_workers=1,
                 _video_width=0,
                 _video_height=0,
                 _video_min_dimension=0,
                 _audio_samples=0):
        super(UCF101, self).__init__(root)
        if not 1 <= fold <= 3:
            raise ValueError(
                "fold should be between 1 and 3, got {}".format(fold))

        extensions = ('avi', )
        self.fold = fold
        self.train = train

        classes = list(sorted(list_dir(root)))
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        self.samples = make_dataset(self.root,
                                    class_to_idx,
                                    extensions,
                                    is_valid_file=None)
        self.classes = classes
        video_list = [x[0] for x in self.samples]

        metadata_filepath = os.path.join(root, 'ucf101_metadata.pt')
        if os.path.exists(metadata_filepath):
            metadata = torch.load(metadata_filepath)
        else:
            metadata = None
        video_clips = VideoClips(
            video_list,
            frames_per_clip,
            step_between_clips,
            frame_rate,
            metadata,
            num_workers=num_workers,
            _video_width=_video_width,
            _video_height=_video_height,
            _video_min_dimension=_video_min_dimension,
            _audio_samples=_audio_samples,
        )
        if not os.path.exists(metadata_filepath):
            torch.save(video_clips.metadata, metadata_filepath)

        self.video_clips_metadata = video_clips.metadata
        self.indices = self._select_fold(video_list, annotation_path, fold,
                                         train)
        self.video_clips = video_clips.subset(self.indices)
        self.transform = transform
コード例 #26
0
    def __init__(self,
                 root,
                 annotation_path,
                 frames_per_clip,
                 step_between_clips=1,
                 frame_rate=None,
                 fold=1,
                 train=True,
                 transform=None,
                 _precomputed_metadata=None,
                 num_workers=1,
                 _video_width=0,
                 _video_height=0,
                 _video_min_dimension=0,
                 _audio_samples=0):
        super(MYUCF101, self).__init__(root)
        if not 1 <= fold <= 3:
            raise ValueError(
                "fold should be between 1 and 3, got {}".format(fold))

        extensions = ('avi', )
        self.fold = fold
        self.train = train

        classes = list(sorted(list_dir(root)))
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        self.samples = make_dataset(self.root,
                                    class_to_idx,
                                    extensions,
                                    is_valid_file=None)
        self.classes = classes
        video_list = [x[0] for x in self.samples]
        video_clips = VideoClips(
            video_list,
            frames_per_clip,
            step_between_clips,
            frame_rate,
            _precomputed_metadata,
            num_workers=num_workers,
            _video_width=_video_width,
            _video_height=_video_height,
            _video_min_dimension=_video_min_dimension,
            _audio_samples=_audio_samples,
        )

        meta_data_str_ = os.path.join(
            root,
            f"meta_data_train_{train}_fold_{fold}_frames_{frames_per_clip}_skip_"
            f"{step_between_clips}.pickle")
        if not os.path.exists(meta_data_str_):
            with open(meta_data_str_, 'wb') as ff:
                pickle.dump(video_clips.metadata, ff)

        self.video_clips_metadata = video_clips.metadata
        self.indices = self._select_fold(video_list, annotation_path, fold,
                                         train)
        self.video_clips = video_clips.subset(self.indices)
        self.transform = transform
コード例 #27
0
ファイル: n_vcdb.py プロジェクト: song020/NDVC
    def __init__(self, video_root='/DB/VCDB/core_dataset', fps=1, extensions=['mp4', 'flv']):
        self.video_root = video_root
        self.classes, self.class_to_idx = self._find_classes(self.video_root)

        self.videos = make_dataset(self.video_root, self.class_to_idx, extensions)
        self.video_list, self.meta = self.__read_video_meta()
        print('sampling')
        self.frames, self.frames_cnt = self.__sampling_frames(fps=fps)

        print('sampling')
コード例 #28
0
 def get_data(self):
     folder = os.path.join(self.root, self.split_folder[self.split])
     classes, class_to_idx = self._find_classes(folder)
     samples = make_dataset(folder,
                            class_to_idx,
                            is_valid_file=is_image_file)
     data, targets = zip(*samples)
     logger.info(
         f"Dataset summary: #examples={len(data)}; #classes={len(classes)}")
     return data, targets, classes
コード例 #29
0
ファイル: datasets.py プロジェクト: vietnamican/tinyimagenet
 def make_dataset(
     directory: str,
     class_to_idx,
     extensions=IMG_EXTENSIONS,
     is_valid_file=None,
 ):
     return make_dataset(directory,
                         class_to_idx,
                         extensions=extensions,
                         is_valid_file=is_valid_file)
コード例 #30
0
def create_image_to_label(directory, batch_size=16, ahead=4):
    ed = expdir.ExperimentDirectory(directory)
    info = ed.load_info()

    print info.dataset
    if 'broden' in info.dataset:
        ds = loadseg.SegmentationData(info.dataset)
        categories = ds.category_names()
        shape = (ds.size(), len(ds.label))

        pf = loadseg.SegmentationPrefetcher(ds,
                                            categories=categories,
                                            once=True,
                                            batch_size=batch_size,
                                            ahead=ahead,
                                            thread=False)

        image_to_label = np.zeros(shape, dtype='int32')

        batch_count = 0
        for batch in pf.batches():
            if batch_count % 100 == 0:
                print('Processing batch %d ...' % batch_count)
            for rec in batch:
                image_index = rec['i']
                for cat in categories:
                    if ((type(rec[cat]) is np.ndarray and rec[cat].size > 0)
                            or type(rec[cat]) is list and len(rec[cat]) > 0):
                        image_to_label[image_index][np.unique(rec[cat])] = True
            batch_count += 1
    elif 'imagenet' in info.dataset or 'ILSVRC' in info.dataset:
        classes, class_to_idx = find_classes(info.dataset)
        imgs = make_dataset(info.dataset, class_to_idx)
        _, labels = zip(*imgs)
        labels = np.array(labels)

        L = 1000
        shape = (len(labels), L)

        image_to_label = np.zeros(shape)

        for i in range(L):
            image_to_label[labels == i, i] = 1
    else:
        assert (False)

    mmap = ed.open_mmap(part='image_to_label',
                        mode='w+',
                        dtype=bool,
                        shape=shape)
    mmap[:] = image_to_label[:]
    ed.finish_mmap(mmap)
    f = ed.mmap_filename(part='image_to_label')

    print('Finished and saved index_to_label at %s' % f)
コード例 #31
0
    def __init__(self, data_path, image_cache, do_random_flips=False,
                 normalization=transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))):
        classes, class_to_idx = find_classes(data_path)
        imgs = make_dataset(data_path, class_to_idx)
        if len(imgs) == 0:
            raise(RuntimeError("Found 0 images in subfolders of: " + data_path + "\n"
                               "Supported image extensions are: " + ",".join(IMG_EXTENSIONS)))

        self.root = data_path
        self.imgs = imgs
        self.classes = classes
        self.class_to_idx = class_to_idx
        self.normalization = normalization
        self.do_random_flips = do_random_flips
        self.image_cache = image_cache
コード例 #32
0
def read_all_images(root, num_workers=4):
    classes, class_to_idx = find_classes(root)
    dataset = make_dataset(root, class_to_idx)
    if len(dataset) == 0:
        raise (RuntimeError("Found 0 images in subfolders of: " + root + "\n" +
                            "Supported image extensions are: " + ",".join(IMG_EXTENSIONS)))

    num_images = len(dataset)
    paths = [dataset[i_image][0] for i_image in range(num_images)]

    print("Reading {0} images with {1} workers".format(num_images, num_workers))
    if num_workers > 1:
        images = parallel_process(paths, read_image_for_pytorch, n_jobs=num_workers)
    else:
        images = []
        for p in tqdm(paths):
            images.append(read_image_for_pytorch(p))

    image_cache = {}
    for i, image in enumerate(images):
        path, target = dataset[i]
        image_cache[path] = image
    return image_cache