def batch_bdc2rws_contour(dcm_dir, bdc_dir, rws_dir, **kwargs): """ Convert BDC format annotation to rws format. Args: dcm_dir (str): dicom files directory. bdc_dir (str): bdc annotation files directory. rws_dir (str): rws annotation files directory. N.B. dicom title should be exactly the same with annotation file title. e.g. 123.dcm, 123.txt """ mv.mkdirs(rws_dir) dcm_filenames = mv.listdir(dcm_dir) bdc_filenames = mv.listdir(bdc_dir) dcm_titles = [mv.splitext(fn)[0] for fn in dcm_filenames] bdc_titles = [mv.splitext(fn)[0] for fn in bdc_filenames] file_titles = list(set(dcm_titles).intersection(set(bdc_titles))) if (len(dcm_filenames) != len(bdc_filenames) or len(file_titles) != len(dcm_filenames)): logging.warning('dicoms & annotations do not exactly match') for file_title in tqdm(file_titles): dcm_path = mv.joinpath(dcm_dir, file_title + '.dcm') bdc_path = mv.joinpath(bdc_dir, file_title + '.txt') rws_path = mv.joinpath(rws_dir, file_title + '.json') bdc2rws_contour(dcm_path, bdc_path, rws_path, **kwargs)
def test_dsmd_io_det(): tmp_dir = mv.joinpath(DATA_DIR, 'temporary_subdir') tmp_path = mv.joinpath(tmp_dir, 'tmp_dsmd.txt') dsmd_loaded = mv.load_dsmd(DSMD_DET_GT, DSMD_DET_C2L, mode='det') mv.save_dsmd(tmp_path, dsmd_loaded, DSMD_DET_C2L, mode='det') dsmd_reloaded = mv.load_dsmd(DSMD_DET_GT, DSMD_DET_C2L, mode='det') assert_equal_dsmds(dsmd_loaded, dsmd_reloaded) mv.rmtree(tmp_dir)
def test_dsmd_io_cls(dsmd_file): tmp_dir = mv.joinpath(DATA_DIR, 'temporary_subdir') tmp_path = mv.joinpath(tmp_dir, 'tmp_dsmd.txt') dsmd_loaded = mv.load_dsmd(dsmd_file) mv.save_dsmd(tmp_path, dsmd_loaded) dsmd_reloaded = mv.load_dsmd(tmp_path) assert_equal_dsmds(dsmd_loaded, dsmd_reloaded) mv.rmtree(tmp_dir)
def test_imread_imwrite(img): dst_dir = mv.joinpath(DATA_DIR, 'temporary_subdir') dst_path = mv.joinpath(dst_dir, mv.basename(PNG_IMG_PATH)) mv.mkdirs(dst_dir) ret_val = mv.imwrite(img, dst_path) assert ret_val img_reloaded = mv.imread(dst_path, mv.ImreadMode.UNCHANGED) assert_image_equal(img, img_reloaded) mv.rmtree(dst_dir)
def save_ckpt_to_dir(model, ckpt_dir, identifier): """ Save checkpoint to file and make a soft link to the latest ckpt. Args: model (Module): module whose params are to be saved. ckpt_dir (str): directory to save the checkpoint file. identifier (str): ckpt identifier (e.g. number of epochs). """ model_path = mv.joinpath(ckpt_dir, str(identifier) + '.pth') mv.save_checkpoint(model, model_path) link_path = mv.joinpath(ckpt_dir, 'latest' + '.pth') mv.symlink(mv.abspath(model_path), link_path)
def bdc2dsmd_det_2d(annot_dir, image_dir=None, class2label=None, ignore_label_name=True, replace_ext=lambda x: x): # N.B. annotation file name and image file name should be the same num_classes = len(class2label) if class2label is not None else 1 filenames = mv.listdir(annot_dir) empty_bboxes = np.zeros((0, 4), dtype=np.float32) dsmd = { replace_ext(filename): [empty_bboxes] * num_classes for filename in filenames } for filename in filenames: annot_filepath = mv.joinpath(annot_dir, filename) bboxes = load_bdc_dr_bbox( annot_filepath, lambda x: 0 if ignore_label_name else class2label) for label, bbox in bboxes: bbox = np.array(bbox, dtype=np.float32).reshape(-1, 4) if dsmd[replace_ext(filename)][label].shape[0] == 0: dsmd[replace_ext(filename)][label] = bbox else: dsmd[replace_ext(filename)][label] = np.append( dsmd[replace_ext(filename)][label], bbox, axis=0) return mv.make_dsmd(dsmd)
def init_logging(log_dir=None, config_file=None): if log_dir is None: log_dir = os.getcwd() if config_file is None: config_file = mv.joinpath(mv.parentdir(mv.parentdir(__file__)), 'configs/default_log_config.yaml') with open(config_file, 'rt') as f: config = yaml.safe_load(f.read()) config['handlers']['info_file_handler']['filename'] = \ mv.joinpath(log_dir, 'info.log') config['handlers']['error_file_handler']['filename'] = \ mv.joinpath(log_dir, 'error.log') mv.mkdirs(log_dir) logging.config.dictConfig(config)
def test_mkdirs(): with not_raises(FileExistsError): mv.mkdirs(DATA_DIR) path = mv.joinpath(DATA_DIR, 'temporary_subdir') mv.mkdirs(path) assert mv.isdir(path) mv.rmtree(path)
def test_gen_cls_ds(): tmp_dir = mv.joinpath(DATA_DIR, 'temporary_subdir') mv.mkdirs(tmp_dir) tmp_c2l_path = mv.joinpath(tmp_dir, 'tmp_c2l.txt') tmp_dsmd_path = mv.joinpath(tmp_dir, 'tmp_dsmd.txt') mv.gen_cls_dsmd_file_from_datafolder(DF_DIR, tmp_c2l_path, tmp_dsmd_path) dsmd = mv.load_dsmd(DSMD_DF) tmp_dsmd = mv.load_dsmd(tmp_dsmd_path) c2l = mv.load_dsmd(CLS2LBL) tmp_c2l = mv.load_dsmd(tmp_c2l_path) assert_equal_dsmds(dsmd, tmp_dsmd) assert_equal_dsmds(c2l, tmp_c2l) mv.empty_dir(tmp_dir) mv.gen_cls_ds_from_datafolder(DF_DIR, tmp_dir) assert len(mv.listdir(tmp_dir)) == 8 mv.rmtree(tmp_dir)
def load_ckpt_from_dir(model, ckpt_dir, identifier='latest'): """ Load checkpoint from a directory with given identifier. Args: model (Module): module whose params are to be saved. ckpt_dir (str): directory to load the checkpoint file. identifier (str): ckpt identifier (e.g. number of epochs). """ model_path = mv.joinpath(ckpt_dir, str(identifier) + '.pth') mv.load_checkpoint(model, model_path)
def batch_mask2rws(mask_dir, rws_dir, **kwargs): """ Convert mask format annotation to rws format. Args: mask_dir (str): mask files directory. rws_dir (str): rws annotation files directory. N.B. dicom file title should be exactly the same with mask file title. e.g. 123.dcm, 123.png """ mv.mkdirs(rws_dir) mask_filenames = mv.listdir(mask_dir) file_titles = [mv.splitext(fn)[0] for fn in mask_filenames] for file_title in tqdm(file_titles): mask_path = mv.joinpath(mask_dir, file_title + '.png') rws_path = mv.joinpath(rws_dir, file_title + '.json') mask2rws(mask_path, rws_path, **kwargs)
def gen_cls_dsmd_file_from_datafolder( root_dir, c2l_path, dsmd_path, classnames=None): """ Generate classification dataset metadata file from DataFolder for specified classes. DataFolder is a directory structure for image classification problems. Each sub-directory contains images from a special class. DataFolder directory structure looks like ----------------------- ├── class1 │ ├── 1.png │ └── 2.png │ ... ├── class2 │ ├── 3.png │ └── 4.png └── ... ----------------------- Args: root_dir (str): root data directory containing all the images. c2l_path (str): file path to save class2label info. dsmd_path (str): file path to save dataset metadata file. classnames (list[str]): names of specified classes. If not given, all classes are considered. Note: This function is expected to be used together with 'gen_cls_ds_from_datafolder()'. Filename of each image in DataFolder should be unique. Otherwise, A FileExistsError will be thrown. """ assert mv.isdir(root_dir) if classnames is None: classnames = mv.listdir(root_dir) class2label = {} dsmd = {} for label, classname in enumerate(classnames): class2label[classname] = label class_dir = mv.joinpath(root_dir, classname) assert mv.isdir(class_dir) filenames = mv.listdir(class_dir) for filename in filenames: if filename in dsmd: raise FileExistsError( 'filename {} already exists'.format(filename)) dsmd[filename] = label mv.save_dsmd(c2l_path, class2label) mv.save_dsmd(dsmd_path, dsmd)
def split_dsmd_file(dsmd_filepath, datasplit=None, shuffle=True, suffix='.csv'): """ Split a dataset medadata file into 3 parts. Split a dataset metadata file into 'train.csv', 'val.csv' and 'test.csv'. And put them in the same directory with specified dsmd file. dsmd_filepath (str): file path of dataset metadata. datasplit (dict[str, float]): how to split the dataset. e.g. {'train': 0.9, 'val': 0.1, 'test': 0.0} shuffle (bool): whether to shuffle the dataset before splitting. Note: 0.0 < datasplit['train'] + datasplit['val'] + datasplit['test'] <= 1.0 If there's no image in a split. The corresponding dsmd file will not be saved. """ if datasplit is None: datasplit = {'train': 0.9, 'val': 0.1} dsmd_dir = mv.parentdir(dsmd_filepath) dsmd = mv.load_dsmd(dsmd_filepath) num_total = len(dsmd) keys = list(dsmd.keys()) if shuffle: random.shuffle(keys) sum_ratio = 0.0 splits = {} for mode, ratio in datasplit.items(): file_path = mv.joinpath(dsmd_dir, mode + suffix) splits[file_path] = int(num_total * ratio) sum_ratio += ratio assert 0.0 < sum_ratio <= 1.0 start_index = 0 for file_path, num_cur_split in splits.items(): end_index = start_index + num_cur_split start_index = np.clip(start_index, 0, num_total) end_index = np.clip(end_index, 0, num_total) keys_split = keys[start_index:end_index] keys_split = natsorted(keys_split) dsmd_split = {keys: dsmd[keys] for keys in keys_split} if len(dsmd_split) != 0: mv.save_dsmd(file_path, dsmd_split) mv.save_dsmd(file_path, dsmd_split) start_index = end_index
def test_copyfiles(): dst_dir = mv.joinpath(DATA_DIR, 'temporary_subdir') mv.mkdirs(dst_dir) src_paths = ['brain_001.dcm', 'brain_002.dcm'] mv.copyfiles(src_paths, dst_dir, DCM_DIR) assert len(mv.listdir(dst_dir)) == 2 with pytest.raises(FileExistsError): mv.non_overwrite_cp(mv.joinpath(DCM_DIR, src_paths[0]), dst_dir) with not_raises(FileExistsError): mv.copyfiles(src_paths, dst_dir, DCM_DIR, non_overwrite=False) with pytest.raises(FileExistsError): mv.copyfiles(src_paths, dst_dir, DCM_DIR, non_overwrite=True) mv.empty_dir(dst_dir) assert mv.isdir(dst_dir) assert len(mv.listdir(dst_dir)) == 0 mv.rmtree(dst_dir)
def test_split_dsmd_file(dsmd_file): tmp_dir = mv.joinpath(DATA_DIR, 'temporary_subdir') tmp_path = mv.joinpath(tmp_dir, 'tmp_dsmd.txt') mv.mkdirs(tmp_dir) mv.cp(dsmd_file, tmp_path) datasplit = {'train': 0.9, 'val': 0.1, 'test': 0.0} # shuffle mv.split_dsmd_file(tmp_path, datasplit) train_dsmd_file_path = mv.joinpath(tmp_dir, 'train.csv') val_dsmd_file_path = mv.joinpath(tmp_dir, 'val.csv') test_dsmd_file_path = mv.joinpath(tmp_dir, 'test.csv') assert mv.isfile(train_dsmd_file_path) assert mv.isfile(val_dsmd_file_path) assert not mv.isfile(test_dsmd_file_path) train_dsmd = mv.load_dsmd(train_dsmd_file_path) val_dsmd = mv.load_dsmd(val_dsmd_file_path) assert len(train_dsmd) == 18 assert len(val_dsmd) == 2 # non shuffle mv.split_dsmd_file(tmp_path, datasplit, shuffle=False) train_dsmd_file_path = mv.joinpath(tmp_dir, 'train.csv') val_dsmd_file_path = mv.joinpath(tmp_dir, 'val.csv') test_dsmd_file_path = mv.joinpath(tmp_dir, 'test.csv') assert mv.isfile(train_dsmd_file_path) assert mv.isfile(val_dsmd_file_path) assert not mv.isfile(test_dsmd_file_path) train_dsmd = mv.load_dsmd(train_dsmd_file_path) val_dsmd = mv.load_dsmd(val_dsmd_file_path) assert len(train_dsmd) == 18 assert len(val_dsmd) == 2 assert 'brain_001.dcm' in train_dsmd assert 'brain_019.dcm' in val_dsmd mv.rmtree(tmp_dir)
def load_rws_contour(filepath): with open(filepath, 'r', encoding='utf-8') as f: data = json.load(f) # relative path from label file to relative path from cwd image_path = mv.joinpath(mv.parentdir(filepath), data['imagePath']) height = data.get('imageHeight') width = data.get('imageWidth') shapes = [] for s in data['shapes']: shape = ( s['label'], s['points'], ) shapes.append(shape) return { 'height': height, 'width': width, 'image_path': image_path, 'shapes': shapes }
def isdicomdir(path): """ Judge whether a given directory is a valid dicom directory. If given directory only contains dicoms (at least one dicom file), it is a dicom directory. Otherwise, it is not a dicom directory. Args: path(str): given directory path. Returns: (bool): True if given directory path is a dicom directory, otherwise False. """ if not mv.isdir(path): return False for file_name in mv.listdir(path): file_path = mv.joinpath(path, file_name) if not isdicom(file_path): return False else: return True
def gen_cls_ds_from_datafolder( in_dir, out_dir, auto_mkdirs=True, classnames=None): """ Generate classification dataset from DataFolder. This function will make a copy of each image in the DataFolder to the specified directory. Original DataFolder is left unchanged. Args: in_dir (str): DataFolder root directory. out_dir (str): directory to save all the images in DataFolder. auto_mkdirs (bool): If `out_dir` does not exist, whether to create it automatically. classnames (list[str]): names of specified classes to be collected. If not given, all classes are considered. Note: This function is expected to be used together with gen_cls_dsmd_file_from_datafolder(). Filename of each image in DataFolder should be unique. Otherwise, A FileExistsError will be thrown. DataFolder is described in 'gen_cls_dsmd_file_from_datafolder()'. """ assert mv.isdir(in_dir) # clean output directory if auto_mkdirs: mv.mkdirs(mv.parentdir(out_dir)) mv.empty_dir(out_dir) if classnames is None: classnames = mv.listdir(in_dir) for classname in classnames: class_dir = mv.joinpath(in_dir, classname) assert mv.isdir(class_dir) filenames = natsorted(mv.listdir(class_dir)) mv.copyfiles(filenames, out_dir, class_dir, non_overwrite=True)
def __init__(self, cfg, mode, build_transform, image_loader): self.is_train = (mode == mv.ModeKey.TRAIN) self.mode2dsmd = { mv.ModeKey.TRAIN: cfg.DATA.TRAIN_DSMD, mv.ModeKey.VAL: cfg.DATA.VAL_DSMD, mv.ModeKey.TEST: cfg.DATA.TEST_DSMD, } dsmd_path = self.mode2dsmd[mode] assert mv.isfile(dsmd_path) or mv.isdir(dsmd_path) if mv.isfile(dsmd_path): # for a dsmd file self.dsmd = mv.load_dsmd(dsmd_path) self.filenames = list(self.dsmd.keys()) self.filepaths = [ mv.joinpath(cfg.DATA.IMAGE_DIR, filename) for filename in self.filenames ] else: # for a directory containing test images self.dsmd = None self.filepaths = mv.listdir(dsmd_path) self.transform = build_transform(cfg, self.is_train) self.image_loader = image_loader
def test_has_duplicated_files(): dst_dir = mv.joinpath(DATA_DIR, 'temporary_subdir') mv.mkdirs(dst_dir) # non duplicated files case src_paths = ['brain_001.dcm', 'brain_002.dcm', 'brain_003.dcm'] mv.copyfiles(src_paths, dst_dir, DCM_DIR) assert len(mv.find_duplicated_files(dst_dir)) == 0 # duplicated files case mv.non_overwrite_cp(mv.joinpath(DCM_DIR, src_paths[0]), mv.joinpath(dst_dir, 'dup_0.dcm')) duplicated_files = mv.find_duplicated_files(dst_dir) assert len(duplicated_files) == 1 assert (mv.joinpath(dst_dir, 'brain_001.dcm') in duplicated_files[0] and mv.joinpath(dst_dir, 'dup_0.dcm') in duplicated_files[0]) mv.non_overwrite_cp(mv.joinpath(DCM_DIR, src_paths[1]), mv.joinpath(dst_dir, 'dup_1.dcm')) duplicated_files = mv.find_duplicated_files(dst_dir) assert len(duplicated_files) == 2 mv.rmtree(dst_dir)
import math import numpy as np import torch import medvision as mv import pytest DATA_DIR = mv.joinpath(mv.parentdir(__file__), 'data') PNG_IMG_PATH = mv.joinpath(DATA_DIR, 'pngs', 'Blue-Ogi.png') IM_GRAY = mv.imread(PNG_IMG_PATH, mv.ImreadMode.GRAY) IM_RGB = mv.imread(PNG_IMG_PATH) def assert_image_equal(a, b): assert a.shape == b.shape assert a.dtype == b.dtype diff = np.abs(a.astype(np.float32) - b.astype(np.float32)) assert math.isclose(diff.max(), 0.0) @pytest.mark.parametrize('img', [IM_GRAY, IM_RGB]) def test_make_np(img): tensor = torch.from_numpy(img) img_converted = mv.make_np(tensor) assert_image_equal(img, img_converted) @mv.nograd def test_no_grad(): x = torch.zeros(1, requires_grad=True) y = x * 2 assert y.requires_grad is False
import numpy as np import medvision as mv DATA_DIR = mv.joinpath(mv.parentdir(__file__), 'data') DSMD_DET_DT = mv.joinpath(DATA_DIR, 'texts', 'dsmd_det_dt.csv') DSMD_DET_GT = mv.joinpath(DATA_DIR, 'texts', 'dsmd_det_gt.csv') DSMD_DET_C2L = mv.joinpath(DATA_DIR, 'texts', 'det_classes.csv') def test_eval_det_a(): class2label = mv.load_c2l(DSMD_DET_C2L) dts = mv.load_dsmd(DSMD_DET_DT, DSMD_DET_C2L, mode='det') gts = mv.load_dsmd(DSMD_DET_GT, DSMD_DET_C2L, mode='det') det_metric = mv.eval_det(dts, gts, num_classes=len(class2label), iou_thr=0.5) ap, num_anns = det_metric[0]['ap'], det_metric[0]['num_gt_bboxes'] assert 0.263 < ap < 0.264 assert int(num_anns) == 546 m = mv.eval_det4binarycls(dts, gts) assert 0.849 < m['accuracy'] < 0.850 def test_eval_det_b(): gts = mv.load_dsmd(DSMD_DET_GT, DSMD_DET_C2L, mode='det') gts = [value for key, value in gts.items()] dts = [] for img_id, _ in enumerate(gts): dts.append([])
from contextlib import contextmanager import pytest import medvision as mv DATA_DIR = mv.joinpath(mv.parentdir(__file__), 'data') DCM_DIR = mv.joinpath(DATA_DIR, 'dicoms') PNG_DIR = mv.joinpath(DATA_DIR, 'pngs') @contextmanager def not_raises(exception): try: yield except exception: raise pytest.fail("DID RAISE {0}".format(exception)) def test_mkdirs(): with not_raises(FileExistsError): mv.mkdirs(DATA_DIR) path = mv.joinpath(DATA_DIR, 'temporary_subdir') mv.mkdirs(path) assert mv.isdir(path) mv.rmtree(path) def test_copyfiles(): dst_dir = mv.joinpath(DATA_DIR, 'temporary_subdir') mv.mkdirs(dst_dir)
import pytest import medvision as mv DATA_DIR = mv.joinpath(mv.parentdir(__file__), 'data') DSMD_CLS_SL = mv.joinpath(DATA_DIR, 'texts', 'dsmd_cls_single_label.txt') DSMD_CLS_ML = mv.joinpath(DATA_DIR, 'texts', 'dsmd_cls_multi_label.txt') DF_DIR = mv.joinpath(DATA_DIR, 'datafolder') DSMD_DF = mv.joinpath(DATA_DIR, 'texts', 'dsmd_datafolder.txt') CLS2LBL = mv.joinpath(DATA_DIR, 'texts', 'class2label.txt') DSMD_DET_GT = mv.joinpath(DATA_DIR, 'texts', 'dsmd_det_gt.csv') DSMD_DET_C2L = mv.joinpath(DATA_DIR, 'texts', 'det_classes.csv') def assert_equal_dsmds(a, b): assert len(a) == len(b) for key1, key2 in zip(a, b): assert key1 == key2 assert a[key1] == a[key2] @pytest.mark.parametrize('dsmd_file', [DSMD_CLS_SL, DSMD_CLS_ML]) def test_dsmd_io_cls(dsmd_file): tmp_dir = mv.joinpath(DATA_DIR, 'temporary_subdir') tmp_path = mv.joinpath(tmp_dir, 'tmp_dsmd.txt') dsmd_loaded = mv.load_dsmd(dsmd_file) mv.save_dsmd(tmp_path, dsmd_loaded) dsmd_reloaded = mv.load_dsmd(tmp_path) assert_equal_dsmds(dsmd_loaded, dsmd_reloaded) mv.rmtree(tmp_dir)
def gen_path(*paths): return mv.joinpath(DATA_DIR, *paths)
import math import numpy as np import pytest import medvision as mv DATA_DIR = mv.joinpath(mv.parentdir(__file__), 'data') DCM_PATH = mv.joinpath(DATA_DIR, 'dicoms', 'brain_001.dcm') def gen_path(*paths): return mv.joinpath(DATA_DIR, *paths) def assert_image_equal(a, b): assert a.shape == b.shape assert a.dtype == b.dtype diff = np.abs(a.astype(np.float32) - b.astype(np.float32)) assert math.isclose(diff.max(), 0.0) @pytest.mark.parametrize('given, expected', [(gen_path('dicoms', 'brain_001.dcm'), True), (gen_path('pngs', 'Blue-Ogi.png'), False), (gen_path('dicoms'), False), (gen_path('texts', 'null.txt'), False)]) def test_isdicom(given, expected): assert mv.isdicom(given) == expected @pytest.mark.parametrize('given, expected', [(gen_path('dicoms'), True),