def __init__(self, root, transform=None, labelfile=None): self.root = root self.transform = transform generated_dirs = sorted([d.name for d in os.scandir( self.root) if d.is_dir() and not d.name.endswith('_gt')]) gt_dirs = sorted([d.name for d in os.scandir( self.root) if d.is_dir() and d.name.endswith('_gt')]) assert len(gt_dirs) == len(generated_dirs) gt_files = [] generated_files = [] for i in range(len(gt_dirs)): gt_files.extend([os.path.join(gt_dirs[i], x) for x in os.listdir(os.path.join(self.root, gt_dirs[i]))]) generated_files.extend([os.path.join(generated_dirs[i], x) for x in os.listdir(os.path.join(self.root, generated_dirs[i]))]) gt_imgs = [x for x in gt_files if has_file_allowed_extension( x, IMG_EXTENSIONS)] generated_imgs = [ x for x in generated_files if has_file_allowed_extension(x, IMG_EXTENSIONS)] self.gt_filenames = gt_imgs self.generated_filenames = generated_imgs assert len(self.gt_filenames) == len(self.generated_filenames) if len(self.gt_filenames) == 0: raise(RuntimeError("Found 0 files in folder: " + root + "\n" "Supported extensions are: " + ",".join(IMG_EXTENSIONS))) self.loader = default_loader
def find_images(dir): paths = [] for fname in sorted(os.listdir(dir)): if has_file_allowed_extension(fname, IMG_EXTENSIONS): path = os.path.join(dir, fname) paths.append(path) return paths
def make_dataset(dir, class_to_idx, extensions, test_fun, num_images): ''' necessary for class CustomImageFolder ''' images = [] dir = os.path.expanduser(dir) c0 = 0 c1 = 0 for target in sorted(os.listdir(dir)): d = os.path.join(dir, target) if not os.path.isdir(d): continue for root, _, fnames in sorted(os.walk(d)): for fname in sorted(fnames): if has_file_allowed_extension( fname, extensions) and test_fun( fname, class_to_idx[target], num_images): if class_to_idx[target]: c1 += 1 else: c0 += 1 path = os.path.join(root, fname) item = (path, class_to_idx[target]) images.append(item) print('- number of images in: class0', c0, ' / class1', c1) return images
def __init__(self, root, loader=default_loader, extensions=IMG_EXTENSIONS, transform=None, target_transform=None, normalize_lab=None): self.images = [] self.transform = transform self.target_transform = target_transform self.loader = loader self.normalize_lab = normalize_lab root = os.path.expanduser(root) for rootdir, _, fnames in sorted(os.walk(root)): for fname in sorted(fnames): if fname[0] == '.': continue if has_file_allowed_extension(fname, extensions): path = os.path.join(rootdir, fname) self.images.append(path) if len(self.images) == 0: raise (RuntimeError("Found 0 files in folder of: " + root + "\n" "Supported extensions are: " + ",".join(extensions))) if normalize_lab is not None: m, std = torch.Tensor(normalize_lab[0]).float(), torch.Tensor( normalize_lab[1]).float() self.normalize_lab = (m, std)
def make_dataset(self): img_names = sorted(os.listdir(self.dir_images)) mask_names = sorted(os.listdir(self.dir_masks)) img_names = [ x for x in img_names if has_file_allowed_extension(x, self.extensions) ] mask_names = [ x for x in mask_names if has_file_allowed_extension(x, self.extensions) ] assert len(img_names) == len(mask_names) return img_names, mask_names
def make_dataset(folder): images = [] for file in os.listdir(folder): if has_file_allowed_extension(file, IMG_EXTENSIONS): path = os.path.join(folder, file) images.append(path) return images
def make_dataset(self, image_name_list=None): if image_name_list is not None: if self.file_count_limit is None: self.file_count_limit = len(image_name_list) images = [os.path.join(self.root, fname) for fname in \ image_name_list[:self.file_count_limit]] else: images = [] root_dir = os.path.expanduser(self.root) for i, fname in enumerate(os.listdir(root_dir)): if has_file_allowed_extension(fname, self.extensions): if self.file_count_limit is not None and \ i == self.file_count_limit: break path = os.path.join(root_dir, fname) images.append(path) return images
def make_dataset(dir, class_to_idx, extensions, domains, start=1934): images = [] meta = [] dir = os.path.expanduser(dir) for target in sorted(os.listdir(dir)): d = os.path.join(dir, target) if not os.path.isdir(d): continue for root, _, fnames in sorted(os.walk(d)): for fname in sorted(fnames): if has_file_allowed_extension(fname, extensions): path = os.path.join(root, fname) year = int(path.split('/')[-1].split('_')[0]) city = (path.split('/')[-1].split('_')[1]) region = REGIONS_DICT[city] pivot_year = start + (year - start) // 10 * 10 if (pivot_year, region) in domains: item = (path, class_to_idx[target]) images.append(item) meta.append([year, region]) return images, meta
def load_data( self, data: Union[str, Tuple[List[str], List[Any]]], dataset: Optional[Any] = None) -> Sequence[Mapping[str, Any]]: if self.isdir(data): classes, class_to_idx = self.find_classes(data) if not classes: return self.predict_load_data(data) else: self.set_state(LabelsState(classes)) if dataset is not None: dataset.num_classes = len(classes) data = make_dataset(data, class_to_idx, extensions=self.extensions) return [{ DefaultDataKeys.INPUT: input, DefaultDataKeys.TARGET: target } for input, target in data] return list( filter( lambda sample: has_file_allowed_extension( sample[DefaultDataKeys.INPUT], self.extensions), super().load_data(data, dataset), ))
def load_data(self, data: Union[Tuple[str, str], Tuple[List[str], List[str]]], dataset: BaseAutoDataset) -> Sequence[Mapping[str, Any]]: input_data, target_data = data if self.isdir(input_data) and self.isdir(target_data): input_files = os.listdir(input_data) target_files = os.listdir(target_data) all_files = set(input_files).intersection(set(target_files)) if len(all_files) != len(input_files) or len(all_files) != len( target_files): rank_zero_warn( f"Found inconsistent files in input_dir: {input_data} and target_dir: {target_data}. Some files" " have been dropped.", UserWarning, ) input_data = [os.path.join(input_data, file) for file in all_files] target_data = [ os.path.join(target_data, file) for file in all_files ] if not isinstance(input_data, list) and not isinstance( target_data, list): input_data = [input_data] target_data = [target_data] if len(input_data) != len(target_data): raise MisconfigurationException( f"The number of input files ({len(input_data)}) and number of target files ({len(target_data)}) must be" " the same.", ) data = filter( lambda sample: (has_file_allowed_extension(sample[0], self.extensions) and has_file_allowed_extension(sample[1], self.extensions)), zip(input_data, target_data), ) data = [{ DefaultDataKeys.INPUT: input, DefaultDataKeys.TARGET: target } for input, target in data] return data
def from_folder(cls, folder: Union[str, pathlib.Path], transform: Optional[Callable] = _default_valid_transforms, loader: Callable = _pil_loader, batch_size: int = 64, num_workers: Optional[int] = None, **kwargs): """ Creates a ImageClassificationData object from folders of images arranged in this way: :: folder/dog_xxx.png folder/dog_xxy.png folder/dog_xxz.png folder/cat_123.png folder/cat_nsdf3.png folder/cat_asd932_.png Args: folder: Path to the data folder. transform: Image transform to apply to the data. loader: A function to load an image given its path. batch_size: Batch size for data loading. num_workers: The number of workers to use for parallelized loading. Defaults to None which equals the number of available CPU threads. Returns: ImageClassificationData: the constructed data module Examples: >>> img_data = ImageClassificationData.from_folder("my_folder/") # doctest: +SKIP """ if not os.path.isdir(folder): raise MisconfigurationException("folder should be a directory") filenames = os.listdir(folder) if any(not has_file_allowed_extension(f, IMG_EXTENSIONS) for f in filenames): raise MisconfigurationException( "No images with allowed extensions {IMG_EXTENSIONS} where found in {folder}" ) test_ds = (FlashDatasetFolder( folder, transform=transform, loader=loader, with_targets=False, img_paths=[os.path.join(folder, f) for f in filenames])) datamodule = cls( test_ds=test_ds, batch_size=batch_size, num_workers=num_workers, ) datamodule.data_pipeline = ImageClassificationDataPipeline( valid_transform=transform, loader=loader) return datamodule
def is_image_file(filename): """Checks if a file is an allowed image extension. Args: filename (string): path to a file Returns: bool: True if the filename ends with a known image extension """ return has_file_allowed_extension(filename, IMG_EXTENSIONS)
def make_dataset(dir, extensions): images = [] for root, _, fnames in sorted(os.walk(dir)): for fname in sorted(fnames): if has_file_allowed_extension(fname, extensions): path = os.path.join(root, fname) item = path images.append(item) return images
def make_dataset(dir, extensions): images = [] dir = os.path.expanduser(dir) for root, _, fnames in sorted(os.walk(dir)): for fname in sorted(fnames): if has_file_allowed_extension(fname, extensions): path = os.path.join(root, fname) images.append(path) return images
def _load_data_dir(cls, data: Any, dataset: Optional[AutoDataset] = None) -> List[str]: if isinstance(data, list): dataset.num_classes = len(data) out = [] for p, label in data: if os.path.isdir(p): for f in os.listdir(p): if has_file_allowed_extension(f, IMG_EXTENSIONS): out.append([os.path.join(p, f), label]) elif os.path.isfile(p) and has_file_allowed_extension( p, IMG_EXTENSIONS): out.append([p, label]) return out else: classes, class_to_idx = cls._find_classes(data) dataset.num_classes = len(classes) return make_dataset(data, class_to_idx, IMG_EXTENSIONS, None)
def read_images(self, dir, extensions): images = {} for root, _, fnames in sorted(os.walk(dir)): for fname in sorted(fnames): if has_file_allowed_extension(fname, extensions): m = self.ID_PATTERN.search(fname) if m is not None: rid = m.group(1) path = os.path.join(root, fname) images[rid] = path return images
def item_class(dir, extensions): from torchvision.datasets.folder import has_file_allowed_extension item_class = ItemClass(os.path.basename(dir), []) for target in sorted(os.listdir(dir)): d = os.path.join(dir, target) if os.path.isdir(d): item_class.add_sub_class(ImageDir.item_class(d, extensions)) else: if has_file_allowed_extension(d, extensions): item_class.items.append(d) return item_class
def __init__(self, root, loader=None, transform=None): assert os.path.exists(root) self.classes, class_to_idx = find_classes(os.path.join(root, 'train')) self.image_paths = [] path = os.path.join(root, 'test', 'images') for p, _, fnames in sorted(os.walk(path)): for fname in sorted(fnames): if has_file_allowed_extension(fname, IMG_EXTENSIONS): path = os.path.join(p, fname) self.image_paths.append(path) self.loader = pil_loader if loader is None else loader self.transform = transform
def read_reports(self, dir, extensions): reports = {} for root, _, fnames in sorted(os.walk(dir)): for fname in sorted(fnames): if has_file_allowed_extension(fname, extensions): m = self.ID_PATTERN.search(fname) if m is not None: rid = m.group(1) report = os.path.join(root, fname) if self.cache_text: self.extract_section(report) reports[rid] = report return reports
def _get_predicting_files(samples: Union[Sequence, str]) -> List[str]: files = [] if isinstance(samples, str): samples = [samples] if isinstance(samples, (list, tuple)) and all(os.path.isdir(s) for s in samples): files = [os.path.join(sp, f) for sp in samples for f in os.listdir(sp)] elif isinstance(samples, (list, tuple)) and all(os.path.isfile(s) for s in samples): files = samples files = list(filter(lambda p: has_file_allowed_extension(p, IMG_EXTENSIONS), files)) return files
def _load_data_dir( cls, data: Any, dataset: Optional[AutoDataset] = None, ) -> Tuple[Optional[List[str]], List[Tuple[str, int]]]: if isinstance(data, list): # TODO: define num_classes elsewhere. This is a bad assumption since the list of # labels might not contain the complete set of ids so that you can infer the total # number of classes to train in your dataset. dataset.num_classes = len(data) out: List[Tuple[str, int]] = [] for p, label in data: if os.path.isdir(p): # TODO: there is an issue here when a path is provided along with labels. # os.listdir cannot assure the same file order as the passed labels list. files_list: List[str] = os.listdir(p) if len(files_list) > 1: raise ValueError( f"The provided directory contains more than one file." f"Directory: {p} -> Contains: {files_list}") for f in files_list: if has_file_allowed_extension(f, IMG_EXTENSIONS): out.append([os.path.join(p, f), label]) elif os.path.isfile(p) and has_file_allowed_extension( str(p), IMG_EXTENSIONS): out.append([p, label]) else: raise TypeError(f"Unexpected file path type: {p}.") return None, out else: classes, class_to_idx = cls._find_classes(data) # TODO: define num_classes elsewhere. This is a bad assumption since the list of # labels might not contain the complete set of ids so that you can infer the total # number of classes to train in your dataset. dataset.num_classes = len(classes) return classes, make_dataset(data, class_to_idx, IMG_EXTENSIONS, None)
def predict_load_data(self, data: Union[str, List[str]], dataset: Optional[Any] = None) -> Sequence[Mapping[str, Any]]: if self.isdir(data): data = [os.path.join(data, file) for file in os.listdir(data)] if not isinstance(data, list): data = [data] return list( filter( lambda sample: has_file_allowed_extension(sample[DefaultDataKeys.INPUT], self.extensions), super().predict_load_data(data), ) )
def make_dataset(list_file, class_to_idx, extensions, domains): images = [] meta=[] with open(list_file) as f: lines = f.readlines() for l in lines: fname, domain, target = l.strip().split(' ') year,viewpoint=domain.split('-') if has_file_allowed_extension(fname, extensions) and (year,viewpoint) in domains: path = fname item = (path, class_to_idx[target]) meta.append([int(year),int(viewpoint)]) images.append(item) return images, meta
def make_dataset(fol, class_to_idx, extensions): images = [] fol = os.path.expanduser(fol) for target in sorted(os.listdir(fol)): d = os.path.join(fol, target) if not os.path.isdir(d): continue for root, _, fnames in sorted(os.walk(d)): for fname in sorted(fnames): if has_file_allowed_extension(fname, extensions): path = os.path.join(root, fname) item = (path, class_to_idx[target]) images.append(item) return images
def make_dataset(dir, class_to_idx, extensions): ''' helper to read SVRT dataset ''' images = [] dir = os.path.expanduser(dir) for root, _, fnames in sorted(os.walk(dir)): for fname in sorted(fnames): if has_file_allowed_extension(fname, extensions): path = os.path.join(root, fname) idx_class = int( fname[7] ) # the image names start with 'sample_1_...' or 'sample_0_...' item = (path, idx_class) images.append(item) return images
def make_dataset_withbbox(dir, class_to_idx, extensions, bounding_box): images = [] dir = os.path.expanduser(dir) for target in sorted(class_to_idx.keys()): d = os.path.join(dir, target) if not os.path.isdir(d): continue for root, _, fnames in sorted(os.walk(d)): for fname in sorted(fnames): if has_file_allowed_extension(fname, extensions): idx = int(fname.split('.')[0]) path = os.path.join(root, fname) item = (path, class_to_idx[target], bounding_box[idx, :]) images.append(item) return images
def _make_dataset(dir, class_to_idx, extensions): images = [] dir = os.path.expanduser(dir) for target in sorted(class_to_idx.keys()): d = os.path.join(dir, target) if not os.path.isdir(d): continue for root, _, fnames in sorted(os.walk(d)): for fname in sorted(fnames): if has_file_allowed_extension(fname, extensions): path = os.path.join(root, fname) track_idx = int(''.join(filter(str.isdigit, target))) item = (path, track_idx) images.append(item) return images
def make_dataset(dir, class_to_idx, img_to_classes, read_labels): images = [] dir = os.path.expanduser(dir) for file in sorted(os.listdir(dir)): d = os.path.join(dir, file) if os.path.isdir(d): continue if has_file_allowed_extension( d, ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif']): img_id = file.split(".")[0] target = np.zeros(len(class_to_idx), dtype=np.float32) if read_labels: idx_with_class = [ class_to_idx[c] for c in img_to_classes[img_id] ] target[idx_with_class] = 1 item = (d, target, int(img_id)) images.append(item) return images
def is_valid_file(x: str) -> bool: return has_file_allowed_extension( x, cast(tuple[str, ...], extensions))
def is_valid_file(x): return has_file_allowed_extension(x, extensions)