Example #1
0
def batch_bdc2rws_contour(dcm_dir, bdc_dir, rws_dir, **kwargs):
    """ Convert BDC format annotation to rws format.

    Args:
        dcm_dir (str): dicom files directory.
        bdc_dir (str): bdc annotation files directory.
        rws_dir (str): rws annotation files directory.

    N.B.
        dicom title should be exactly the same with annotation
        file title. e.g. 123.dcm, 123.txt
    """
    mv.mkdirs(rws_dir)
    dcm_filenames = mv.listdir(dcm_dir)
    bdc_filenames = mv.listdir(bdc_dir)

    dcm_titles = [mv.splitext(fn)[0] for fn in dcm_filenames]
    bdc_titles = [mv.splitext(fn)[0] for fn in bdc_filenames]
    file_titles = list(set(dcm_titles).intersection(set(bdc_titles)))

    if (len(dcm_filenames) != len(bdc_filenames)
            or len(file_titles) != len(dcm_filenames)):
        logging.warning('dicoms & annotations do not exactly match')

    for file_title in tqdm(file_titles):
        dcm_path = mv.joinpath(dcm_dir, file_title + '.dcm')
        bdc_path = mv.joinpath(bdc_dir, file_title + '.txt')
        rws_path = mv.joinpath(rws_dir, file_title + '.json')
        bdc2rws_contour(dcm_path, bdc_path, rws_path, **kwargs)
Example #2
0
def gen_cls_dsmd_file_from_datafolder(
        root_dir, c2l_path, dsmd_path, classnames=None):
    """ Generate classification dataset metadata file from DataFolder for
    specified classes.

    DataFolder is a directory structure for image classification problems.
    Each sub-directory contains images from a special class. DataFolder
    directory structure looks like
    -----------------------
    ├── class1
    │   ├── 1.png
    │   └── 2.png
    │   ...
    ├── class2
    │   ├── 3.png
    │   └── 4.png
    └── ...
    -----------------------

    Args:
        root_dir (str): root data directory containing all the images.
        c2l_path (str): file path to save class2label info.
        dsmd_path (str): file path to save dataset metadata file.
        classnames (list[str]): names of specified classes.
            If not given, all classes are considered.

    Note:
        This function is expected to be used together with
        'gen_cls_ds_from_datafolder()'.
        Filename of each image in DataFolder should be unique. Otherwise,
        A FileExistsError will be thrown.
    """
    assert mv.isdir(root_dir)

    if classnames is None:
        classnames = mv.listdir(root_dir)

    class2label = {}
    dsmd = {}

    for label, classname in enumerate(classnames):
        class2label[classname] = label

        class_dir = mv.joinpath(root_dir, classname)
        assert mv.isdir(class_dir)
        filenames = mv.listdir(class_dir)
        for filename in filenames:
            if filename in dsmd:
                raise FileExistsError(
                    'filename {} already exists'.format(filename))
            dsmd[filename] = label

    mv.save_dsmd(c2l_path, class2label)
    mv.save_dsmd(dsmd_path, dsmd)
Example #3
0
def bdc2dsmd_det_2d(annot_dir,
                    image_dir=None,
                    class2label=None,
                    ignore_label_name=True,
                    replace_ext=lambda x: x):
    # N.B. annotation file name and image file name should be the same
    num_classes = len(class2label) if class2label is not None else 1

    filenames = mv.listdir(annot_dir)
    empty_bboxes = np.zeros((0, 4), dtype=np.float32)
    dsmd = {
        replace_ext(filename): [empty_bboxes] * num_classes
        for filename in filenames
    }
    for filename in filenames:
        annot_filepath = mv.joinpath(annot_dir, filename)
        bboxes = load_bdc_dr_bbox(
            annot_filepath, lambda x: 0 if ignore_label_name else class2label)
        for label, bbox in bboxes:
            bbox = np.array(bbox, dtype=np.float32).reshape(-1, 4)
            if dsmd[replace_ext(filename)][label].shape[0] == 0:
                dsmd[replace_ext(filename)][label] = bbox
            else:
                dsmd[replace_ext(filename)][label] = np.append(
                    dsmd[replace_ext(filename)][label], bbox, axis=0)

    return mv.make_dsmd(dsmd)
Example #4
0
def test_copyfiles():
    dst_dir = mv.joinpath(DATA_DIR, 'temporary_subdir')
    mv.mkdirs(dst_dir)

    src_paths = ['brain_001.dcm', 'brain_002.dcm']
    mv.copyfiles(src_paths, dst_dir, DCM_DIR)
    assert len(mv.listdir(dst_dir)) == 2

    with not_raises(FileExistsError):
        mv.copyfiles(src_paths, dst_dir, DCM_DIR, non_overwrite=False)

    with pytest.raises(FileExistsError):
        mv.copyfiles(src_paths, dst_dir, DCM_DIR, non_overwrite=True)

    mv.empty_dir(dst_dir)
    assert mv.isdir(dst_dir)
    assert len(mv.listdir(dst_dir)) == 0
    mv.rmtree(dst_dir)
Example #5
0
    def experiment_exists(experiment):
        if not mv.isdir('runs'):
            return False

        all_experiments = mv.listdir('runs')
        all_experiments = [e.split('_', 3)[-1] for e in all_experiments]
        if experiment in all_experiments:
            return True
        else:
            return False
Example #6
0
def gen_cls_ds_from_datafolder(
        in_dir, out_dir, auto_mkdirs=True, classnames=None):
    """ Generate classification dataset from DataFolder.

    This function will make a copy of each image in the DataFolder to the
    specified directory. Original DataFolder is left unchanged.

    Args:
        in_dir (str): DataFolder root directory.
        out_dir (str): directory to save all the images in DataFolder.
        auto_mkdirs (bool): If `out_dir` does not exist, whether to create
            it automatically.
        classnames (list[str]): names of specified classes to be collected.
            If not given, all classes are considered.

    Note:
        This function is expected to be used together with
        gen_cls_dsmd_file_from_datafolder().
        Filename of each image in DataFolder should be unique. Otherwise,
        A FileExistsError will be thrown.
        DataFolder is described in 'gen_cls_dsmd_file_from_datafolder()'.
    """
    assert mv.isdir(in_dir)

    # clean output directory
    if auto_mkdirs:
        mv.mkdirs(mv.parentdir(out_dir))
    mv.empty_dir(out_dir)

    if classnames is None:
        classnames = mv.listdir(in_dir)

    for classname in classnames:
        class_dir = mv.joinpath(in_dir, classname)
        assert mv.isdir(class_dir)
        filenames = natsorted(mv.listdir(class_dir))
        mv.copyfiles(filenames, out_dir, class_dir, non_overwrite=True)
Example #7
0
def test_glob_file():
    filepaths = mv.glob(DATA_DIR,
                        '*.png',
                        mode=mv.GlobMode.FILE,
                        recursive=True)
    assert len(filepaths) == 16

    filepaths = mv.glob(DATA_DIR,
                        '*.png',
                        mode=mv.GlobMode.FILE,
                        recursive=False)
    assert len(filepaths) == 0

    filepaths = mv.glob(PNG_DIR, mode=mv.GlobMode.FILE, recursive=False)
    assert len(filepaths) == len(mv.listdir(PNG_DIR))
Example #8
0
def batch_mask2rws(mask_dir, rws_dir, **kwargs):
    """ Convert mask format annotation to rws format.

    Args:
        mask_dir (str): mask files directory.
        rws_dir (str): rws annotation files directory.

    N.B. dicom file title should be exactly the same with mask file title.
    e.g. 123.dcm, 123.png
    """
    mv.mkdirs(rws_dir)
    mask_filenames = mv.listdir(mask_dir)

    file_titles = [mv.splitext(fn)[0] for fn in mask_filenames]

    for file_title in tqdm(file_titles):
        mask_path = mv.joinpath(mask_dir, file_title + '.png')
        rws_path = mv.joinpath(rws_dir, file_title + '.json')
        mask2rws(mask_path, rws_path, **kwargs)
Example #9
0
def test_gen_cls_ds():
    tmp_dir = mv.joinpath(DATA_DIR, 'temporary_subdir')
    mv.mkdirs(tmp_dir)
    tmp_c2l_path = mv.joinpath(tmp_dir, 'tmp_c2l.txt')
    tmp_dsmd_path = mv.joinpath(tmp_dir, 'tmp_dsmd.txt')
    mv.gen_cls_dsmd_file_from_datafolder(DF_DIR, tmp_c2l_path, tmp_dsmd_path)

    dsmd = mv.load_dsmd(DSMD_DF)
    tmp_dsmd = mv.load_dsmd(tmp_dsmd_path)
    c2l = mv.load_dsmd(CLS2LBL)
    tmp_c2l = mv.load_dsmd(tmp_c2l_path)
    assert_equal_dsmds(dsmd, tmp_dsmd)
    assert_equal_dsmds(c2l, tmp_c2l)

    mv.empty_dir(tmp_dir)
    mv.gen_cls_ds_from_datafolder(DF_DIR, tmp_dir)
    assert len(mv.listdir(tmp_dir)) == 8

    mv.rmtree(tmp_dir)
Example #10
0
def isdicomdir(path):
    """ Judge whether a given directory is a valid dicom directory.

    If given directory only contains dicoms (at least one dicom file),
    it is a dicom directory. Otherwise, it is not a dicom directory.

    Args:
        path(str): given directory path.

    Returns:
        (bool): True if given directory path is a dicom directory,
                otherwise False.
    """
    if not mv.isdir(path):
        return False

    for file_name in mv.listdir(path):
        file_path = mv.joinpath(path, file_name)
        if not isdicom(file_path):
            return False
    else:
        return True
Example #11
0
    def __init__(self, cfg, mode, build_transform, image_loader):
        self.is_train = (mode == mv.ModeKey.TRAIN)

        self.mode2dsmd = {
            mv.ModeKey.TRAIN: cfg.DATA.TRAIN_DSMD,
            mv.ModeKey.VAL: cfg.DATA.VAL_DSMD,
            mv.ModeKey.TEST: cfg.DATA.TEST_DSMD,
        }
        dsmd_path = self.mode2dsmd[mode]
        assert mv.isfile(dsmd_path) or mv.isdir(dsmd_path)

        if mv.isfile(dsmd_path):  # for a dsmd file
            self.dsmd = mv.load_dsmd(dsmd_path)
            self.filenames = list(self.dsmd.keys())
            self.filepaths = [
                mv.joinpath(cfg.DATA.IMAGE_DIR, filename)
                for filename in self.filenames
            ]
        else:  # for a directory containing test images
            self.dsmd = None
            self.filepaths = mv.listdir(dsmd_path)

        self.transform = build_transform(cfg, self.is_train)
        self.image_loader = image_loader