def gen_cls_dsmd_file_from_datafolder( root_dir, c2l_path, dsmd_path, classnames=None): """ Generate classification dataset metadata file from DataFolder for specified classes. DataFolder is a directory structure for image classification problems. Each sub-directory contains images from a special class. DataFolder directory structure looks like ----------------------- ├── class1 │ ├── 1.png │ └── 2.png │ ... ├── class2 │ ├── 3.png │ └── 4.png └── ... ----------------------- Args: root_dir (str): root data directory containing all the images. c2l_path (str): file path to save class2label info. dsmd_path (str): file path to save dataset metadata file. classnames (list[str]): names of specified classes. If not given, all classes are considered. Note: This function is expected to be used together with 'gen_cls_ds_from_datafolder()'. Filename of each image in DataFolder should be unique. Otherwise, A FileExistsError will be thrown. """ assert mv.isdir(root_dir) if classnames is None: classnames = mv.listdir(root_dir) class2label = {} dsmd = {} for label, classname in enumerate(classnames): class2label[classname] = label class_dir = mv.joinpath(root_dir, classname) assert mv.isdir(class_dir) filenames = mv.listdir(class_dir) for filename in filenames: if filename in dsmd: raise FileExistsError( 'filename {} already exists'.format(filename)) dsmd[filename] = label mv.save_dsmd(c2l_path, class2label) mv.save_dsmd(dsmd_path, dsmd)
def test_mkdirs(): with not_raises(FileExistsError): mv.mkdirs(DATA_DIR) path = mv.joinpath(DATA_DIR, 'temporary_subdir') mv.mkdirs(path) assert mv.isdir(path) mv.rmtree(path)
def experiment_exists(experiment): if not mv.isdir('runs'): return False all_experiments = mv.listdir('runs') all_experiments = [e.split('_', 3)[-1] for e in all_experiments] if experiment in all_experiments: return True else: return False
def gen_cls_ds_from_datafolder( in_dir, out_dir, auto_mkdirs=True, classnames=None): """ Generate classification dataset from DataFolder. This function will make a copy of each image in the DataFolder to the specified directory. Original DataFolder is left unchanged. Args: in_dir (str): DataFolder root directory. out_dir (str): directory to save all the images in DataFolder. auto_mkdirs (bool): If `out_dir` does not exist, whether to create it automatically. classnames (list[str]): names of specified classes to be collected. If not given, all classes are considered. Note: This function is expected to be used together with gen_cls_dsmd_file_from_datafolder(). Filename of each image in DataFolder should be unique. Otherwise, A FileExistsError will be thrown. DataFolder is described in 'gen_cls_dsmd_file_from_datafolder()'. """ assert mv.isdir(in_dir) # clean output directory if auto_mkdirs: mv.mkdirs(mv.parentdir(out_dir)) mv.empty_dir(out_dir) if classnames is None: classnames = mv.listdir(in_dir) for classname in classnames: class_dir = mv.joinpath(in_dir, classname) assert mv.isdir(class_dir) filenames = natsorted(mv.listdir(class_dir)) mv.copyfiles(filenames, out_dir, class_dir, non_overwrite=True)
def test_copyfiles(): dst_dir = mv.joinpath(DATA_DIR, 'temporary_subdir') mv.mkdirs(dst_dir) src_paths = ['brain_001.dcm', 'brain_002.dcm'] mv.copyfiles(src_paths, dst_dir, DCM_DIR) assert len(mv.listdir(dst_dir)) == 2 with not_raises(FileExistsError): mv.copyfiles(src_paths, dst_dir, DCM_DIR, non_overwrite=False) with pytest.raises(FileExistsError): mv.copyfiles(src_paths, dst_dir, DCM_DIR, non_overwrite=True) mv.empty_dir(dst_dir) assert mv.isdir(dst_dir) assert len(mv.listdir(dst_dir)) == 0 mv.rmtree(dst_dir)
def isdicomdir(path): """ Judge whether a given directory is a valid dicom directory. If given directory only contains dicoms (at least one dicom file), it is a dicom directory. Otherwise, it is not a dicom directory. Args: path(str): given directory path. Returns: (bool): True if given directory path is a dicom directory, otherwise False. """ if not mv.isdir(path): return False for file_name in mv.listdir(path): file_path = mv.joinpath(path, file_name) if not isdicom(file_path): return False else: return True
def __init__(self, cfg, mode, build_transform, image_loader): self.is_train = (mode == mv.ModeKey.TRAIN) self.mode2dsmd = { mv.ModeKey.TRAIN: cfg.DATA.TRAIN_DSMD, mv.ModeKey.VAL: cfg.DATA.VAL_DSMD, mv.ModeKey.TEST: cfg.DATA.TEST_DSMD, } dsmd_path = self.mode2dsmd[mode] assert mv.isfile(dsmd_path) or mv.isdir(dsmd_path) if mv.isfile(dsmd_path): # for a dsmd file self.dsmd = mv.load_dsmd(dsmd_path) self.filenames = list(self.dsmd.keys()) self.filepaths = [ mv.joinpath(cfg.DATA.IMAGE_DIR, filename) for filename in self.filenames ] else: # for a directory containing test images self.dsmd = None self.filepaths = mv.listdir(dsmd_path) self.transform = build_transform(cfg, self.is_train) self.image_loader = image_loader