コード例 #1
0
ファイル: bdc2rws.py プロジェクト: lemon126/medvision
def batch_bdc2rws_contour(dcm_dir, bdc_dir, rws_dir, **kwargs):
    """ Convert BDC format annotation to rws format.

    Args:
        dcm_dir (str): dicom files directory.
        bdc_dir (str): bdc annotation files directory.
        rws_dir (str): rws annotation files directory.

    N.B.
        dicom title should be exactly the same with annotation
        file title. e.g. 123.dcm, 123.txt
    """
    mv.mkdirs(rws_dir)
    dcm_filenames = mv.listdir(dcm_dir)
    bdc_filenames = mv.listdir(bdc_dir)

    dcm_titles = [mv.splitext(fn)[0] for fn in dcm_filenames]
    bdc_titles = [mv.splitext(fn)[0] for fn in bdc_filenames]
    file_titles = list(set(dcm_titles).intersection(set(bdc_titles)))

    if (len(dcm_filenames) != len(bdc_filenames)
            or len(file_titles) != len(dcm_filenames)):
        logging.warning('dicoms & annotations do not exactly match')

    for file_title in tqdm(file_titles):
        dcm_path = mv.joinpath(dcm_dir, file_title + '.dcm')
        bdc_path = mv.joinpath(bdc_dir, file_title + '.txt')
        rws_path = mv.joinpath(rws_dir, file_title + '.json')
        bdc2rws_contour(dcm_path, bdc_path, rws_path, **kwargs)
コード例 #2
0
ファイル: checkpoint.py プロジェクト: lemon126/medvision
def save_checkpoint(model, path, optimizer=None, metadata=None):
    """ Save checkpoint to file.

    The checkpoint will have 3 fields: ``metadata``, ``state_dict`` and
    ``optimizer``.

    Args:
        model (Module): module whose params are to be saved.
        path (str): path to save the checkpoint file.
        optimizer ('Optimizer', optional): optimizer to be saved.
        metadata (dict, optional): metadata to be saved in checkpoint.
    """
    assert isinstance(metadata, (dict, type(None)))
    if metadata is None:
        metadata = {}

    mv.mkdirs(mv.parentdir(path))

    # if wrapped by nn.DataParallel, remove the wrapper
    if hasattr(model, 'module'):
        model = model.module

    # make a checkpoint
    checkpoint = {'state_dict': _weights_to_cpu(model.state_dict())}
    if optimizer is not None:
        checkpoint['optimizer'] = optimizer.state_dict()
    if metadata is not None:
        checkpoint['metadata'] = metadata

    torch.save(checkpoint, path)
コード例 #3
0
ファイル: test_util.py プロジェクト: lemon126/medvision
def test_mkdirs():
    with not_raises(FileExistsError):
        mv.mkdirs(DATA_DIR)

    path = mv.joinpath(DATA_DIR, 'temporary_subdir')
    mv.mkdirs(path)
    assert mv.isdir(path)
    mv.rmtree(path)
コード例 #4
0
def test_imread_imwrite(img):
    dst_dir = mv.joinpath(DATA_DIR, 'temporary_subdir')
    dst_path = mv.joinpath(dst_dir, mv.basename(PNG_IMG_PATH))
    mv.mkdirs(dst_dir)

    ret_val = mv.imwrite(img, dst_path)
    assert ret_val
    img_reloaded = mv.imread(dst_path, mv.ImreadMode.UNCHANGED)
    assert_image_equal(img, img_reloaded)

    mv.rmtree(dst_dir)
コード例 #5
0
ファイル: classification.py プロジェクト: lemon126/medvision
def save_cls_dsmd(dsmd_path, data, auto_mkdirs=True):
    if auto_mkdirs:
        mv.mkdirs(mv.parentdir(dsmd_path))

    dsmd = mv.make_dsmd(data)
    with open(dsmd_path, 'w') as fd:
        for key, value in dsmd.items():
            if mv.isarrayinstance(value):  # handle multi-label case
                value = ','.join([str(entry) for entry in value])
            line = '%s,%s\n' % (str(key), str(value))
            fd.write(line)
コード例 #6
0
ファイル: init_util.py プロジェクト: lemon126/medvision
def init_logging(log_dir=None, config_file=None):
    if log_dir is None:
        log_dir = os.getcwd()

    if config_file is None:
        config_file = mv.joinpath(mv.parentdir(mv.parentdir(__file__)),
                                  'configs/default_log_config.yaml')

    with open(config_file, 'rt') as f:
        config = yaml.safe_load(f.read())
        config['handlers']['info_file_handler']['filename'] = \
            mv.joinpath(log_dir, 'info.log')
        config['handlers']['error_file_handler']['filename'] = \
            mv.joinpath(log_dir, 'error.log')
        mv.mkdirs(log_dir)
        logging.config.dictConfig(config)
コード例 #7
0
ファイル: test_util.py プロジェクト: rouroulee/medvision
def test_copyfiles():
    dst_dir = mv.joinpath(DATA_DIR, 'temporary_subdir')
    mv.mkdirs(dst_dir)

    src_paths = ['brain_001.dcm', 'brain_002.dcm']
    mv.copyfiles(src_paths, dst_dir, DCM_DIR)
    assert len(mv.listdir(dst_dir)) == 2

    with not_raises(FileExistsError):
        mv.copyfiles(src_paths, dst_dir, DCM_DIR, non_overwrite=False)

    with pytest.raises(FileExistsError):
        mv.copyfiles(src_paths, dst_dir, DCM_DIR, non_overwrite=True)

    mv.empty_dir(dst_dir)
    assert mv.isdir(dst_dir)
    assert len(mv.listdir(dst_dir)) == 0
    mv.rmtree(dst_dir)
コード例 #8
0
def batch_mask2rws(mask_dir, rws_dir, **kwargs):
    """ Convert mask format annotation to rws format.

    Args:
        mask_dir (str): mask files directory.
        rws_dir (str): rws annotation files directory.

    N.B. dicom file title should be exactly the same with mask file title.
    e.g. 123.dcm, 123.png
    """
    mv.mkdirs(rws_dir)
    mask_filenames = mv.listdir(mask_dir)

    file_titles = [mv.splitext(fn)[0] for fn in mask_filenames]

    for file_title in tqdm(file_titles):
        mask_path = mv.joinpath(mask_dir, file_title + '.png')
        rws_path = mv.joinpath(rws_dir, file_title + '.json')
        mask2rws(mask_path, rws_path, **kwargs)
コード例 #9
0
ファイル: test_dataset.py プロジェクト: lemon126/medvision
def test_gen_cls_ds():
    tmp_dir = mv.joinpath(DATA_DIR, 'temporary_subdir')
    mv.mkdirs(tmp_dir)
    tmp_c2l_path = mv.joinpath(tmp_dir, 'tmp_c2l.txt')
    tmp_dsmd_path = mv.joinpath(tmp_dir, 'tmp_dsmd.txt')
    mv.gen_cls_dsmd_file_from_datafolder(DF_DIR, tmp_c2l_path, tmp_dsmd_path)

    dsmd = mv.load_dsmd(DSMD_DF)
    tmp_dsmd = mv.load_dsmd(tmp_dsmd_path)
    c2l = mv.load_dsmd(CLS2LBL)
    tmp_c2l = mv.load_dsmd(tmp_c2l_path)
    assert_equal_dsmds(dsmd, tmp_dsmd)
    assert_equal_dsmds(c2l, tmp_c2l)

    mv.empty_dir(tmp_dir)
    mv.gen_cls_ds_from_datafolder(DF_DIR, tmp_dir)
    assert len(mv.listdir(tmp_dir)) == 8

    mv.rmtree(tmp_dir)
コード例 #10
0
ファイル: util.py プロジェクト: rouroulee/medvision
def save_dsmd(dsmd, file_path, auto_mkdirs=True):
    """ Save dataset metadata to specified file.

    Args:
        dsmd (dict): dataset metadata.
        file_path (str): file path to save dataset metadata.
        auto_mkdirs (bool): If the parent folder of `file_path` does not exist,
            whether to create it automatically.
    """
    if auto_mkdirs:
        mv.mkdirs(mv.parentdir(file_path))

    ordered_dsmd = collections.OrderedDict(natsorted(dsmd.items(),
                                                     key=lambda t: t[0]))
    with open(file_path, 'w') as fd:
        for key, value in ordered_dsmd.items():
            if mv.isarrayinstance(value):  # for multi label case
                value = ', '.join([str(entry) for entry in value])
            line = '%s, %s\n' % (str(key), str(value))
            fd.write(line)
コード例 #11
0
ファイル: test_dataset.py プロジェクト: lemon126/medvision
def test_split_dsmd_file(dsmd_file):
    tmp_dir = mv.joinpath(DATA_DIR, 'temporary_subdir')
    tmp_path = mv.joinpath(tmp_dir, 'tmp_dsmd.txt')
    mv.mkdirs(tmp_dir)
    mv.cp(dsmd_file, tmp_path)

    datasplit = {'train': 0.9, 'val': 0.1, 'test': 0.0}

    # shuffle
    mv.split_dsmd_file(tmp_path, datasplit)
    train_dsmd_file_path = mv.joinpath(tmp_dir, 'train.csv')
    val_dsmd_file_path = mv.joinpath(tmp_dir, 'val.csv')
    test_dsmd_file_path = mv.joinpath(tmp_dir, 'test.csv')
    assert mv.isfile(train_dsmd_file_path)
    assert mv.isfile(val_dsmd_file_path)
    assert not mv.isfile(test_dsmd_file_path)

    train_dsmd = mv.load_dsmd(train_dsmd_file_path)
    val_dsmd = mv.load_dsmd(val_dsmd_file_path)
    assert len(train_dsmd) == 18
    assert len(val_dsmd) == 2

    # non shuffle
    mv.split_dsmd_file(tmp_path, datasplit, shuffle=False)
    train_dsmd_file_path = mv.joinpath(tmp_dir, 'train.csv')
    val_dsmd_file_path = mv.joinpath(tmp_dir, 'val.csv')
    test_dsmd_file_path = mv.joinpath(tmp_dir, 'test.csv')
    assert mv.isfile(train_dsmd_file_path)
    assert mv.isfile(val_dsmd_file_path)
    assert not mv.isfile(test_dsmd_file_path)

    train_dsmd = mv.load_dsmd(train_dsmd_file_path)
    val_dsmd = mv.load_dsmd(val_dsmd_file_path)
    assert len(train_dsmd) == 18
    assert len(val_dsmd) == 2
    assert 'brain_001.dcm' in train_dsmd
    assert 'brain_019.dcm' in val_dsmd

    mv.rmtree(tmp_dir)
コード例 #12
0
def imwrite(file_path, img, auto_mkdirs=True):
    """ Save image to specified file.

    Args:
        file_path (str): specified file path to save to.
        img (ndarray): image array to be written.
        auto_mkdirs (bool): If the parent folder of `file_path` does not exist,
            whether to create it automatically.

    Returns:
        (bool): returns whether the image is saved successfully.

    Note:
        If the given image is a color image. It should be in RGB format.
    """
    if auto_mkdirs:
        mv.mkdirs(mv.parentdir(file_path))

    if img.ndim == 3:
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    return cv2.imwrite(file_path, img)
コード例 #13
0
ファイル: test_util.py プロジェクト: lemon126/medvision
def test_has_duplicated_files():
    dst_dir = mv.joinpath(DATA_DIR, 'temporary_subdir')
    mv.mkdirs(dst_dir)

    # non duplicated files case
    src_paths = ['brain_001.dcm', 'brain_002.dcm', 'brain_003.dcm']
    mv.copyfiles(src_paths, dst_dir, DCM_DIR)
    assert len(mv.find_duplicated_files(dst_dir)) == 0

    # duplicated files case
    mv.non_overwrite_cp(mv.joinpath(DCM_DIR, src_paths[0]),
                        mv.joinpath(dst_dir, 'dup_0.dcm'))
    duplicated_files = mv.find_duplicated_files(dst_dir)
    assert len(duplicated_files) == 1
    assert (mv.joinpath(dst_dir, 'brain_001.dcm') in duplicated_files[0]
            and mv.joinpath(dst_dir, 'dup_0.dcm') in duplicated_files[0])

    mv.non_overwrite_cp(mv.joinpath(DCM_DIR, src_paths[1]),
                        mv.joinpath(dst_dir, 'dup_1.dcm'))
    duplicated_files = mv.find_duplicated_files(dst_dir)
    assert len(duplicated_files) == 2

    mv.rmtree(dst_dir)
コード例 #14
0
def save_det_dsmd(dsmd_path, data, class2label, auto_mkdirs=True):
    """ Save dataset metadata to specified file.

    Args:
        dsmd_path (str): file path to save dataset metadata.
        data (dict): dataset metadata, refer to 'load_dsmd'.
        class2label (str or dict): class-to-label file or class2label dict.
        auto_mkdirs (bool): If the parent folder of `file_path` does not
            exist, whether to create it automatically.
    """
    if auto_mkdirs:
        mv.mkdirs(mv.parentdir(dsmd_path))

    # get label->class mapping
    if isinstance(class2label, str):
        class2label = mv.load_c2l(class2label)
    label2class = {value: key for key, value in class2label.items()}

    # write dataset metadata loop
    dsmd = mv.make_dsmd(data)
    with open(dsmd_path, 'w') as fd:
        for key, value in dsmd.items():
            _write_record(fd, key, value, label2class)
コード例 #15
0
ファイル: util.py プロジェクト: rouroulee/medvision
def gen_cls_ds_from_datafolder(
        in_dir, out_dir, auto_mkdirs=True, classnames=None):
    """ Generate classification dataset from DataFolder.

    This function will make a copy of each image in the DataFolder to the
    specified directory. Original DataFolder is left unchanged.

    Args:
        in_dir (str): DataFolder root directory.
        out_dir (str): directory to save all the images in DataFolder.
        auto_mkdirs (bool): If `out_dir` does not exist, whether to create
            it automatically.
        classnames (list[str]): names of specified classes to be collected.
            If not given, all classes are considered.

    Note:
        This function is expected to be used together with
        gen_cls_dsmd_file_from_datafolder().
        Filename of each image in DataFolder should be unique. Otherwise,
        A FileExistsError will be thrown.
        DataFolder is described in 'gen_cls_dsmd_file_from_datafolder()'.
    """
    assert mv.isdir(in_dir)

    # clean output directory
    if auto_mkdirs:
        mv.mkdirs(mv.parentdir(out_dir))
    mv.empty_dir(out_dir)

    if classnames is None:
        classnames = mv.listdir(in_dir)

    for classname in classnames:
        class_dir = mv.joinpath(in_dir, classname)
        assert mv.isdir(class_dir)
        filenames = natsorted(mv.listdir(class_dir))
        mv.copyfiles(filenames, out_dir, class_dir, non_overwrite=True)
コード例 #16
0
ファイル: runner.py プロジェクト: lemon126/medvision
    def __init__(self,
                 mode,
                 model,
                 batch_processor,
                 train_dataloader=None,
                 val_dataloader=None,
                 optimizer=None,
                 work_dir=None,
                 max_epochs=10000):
        """ A training helper for PyTorch.

        Args:
            model (`torch.nn.Module`): The model to be run.
            mode ('ModeKey'): running mode.
            batch_processor (callable): A callable method that process a data
                batch. The interface of this method should be
                `batch_processor(model, data, train_mode) -> dict`
            train_dataloader ('DataLoader'): train data loader.
            val_dataloader ('DataLoader'): validation data loader.
            optimizer (dict or `Optimizer`): If it is a dict, runner will
                construct an optimizer according to it.
            work_dir (str, optional): The working directory to save
                checkpoints, logs and other outputs.
            max_epochs (int): Total training epochs.
        """
        assert isinstance(mode, mv.ModeKey)
        assert isinstance(model, torch.nn.Module)
        assert callable(batch_processor)
        assert isinstance(optimizer, (str, torch.optim.Optimizer))
        assert isinstance(work_dir, str) or work_dir is None
        assert isinstance(max_epochs, int)

        self.mode = mode
        self.epoch_runner = getattr(self, mode.value)
        self.model = model
        self.batch_processor = batch_processor
        self.train_dataloader = train_dataloader
        self.val_dataloader = val_dataloader
        self.optimizer = self.build_optimizer(optimizer)

        # create work_dir
        self.work_dir = mv.abspath(work_dir if work_dir is not None else '.')
        mv.mkdirs(self.work_dir)

        # init TensorboardX visualizer and dataloader
        if mode == mv.ModeKey.TRAIN:
            experiment = mv.basename(self.work_dir)
            self.visualizer = mv.TensorboardVisualizer(experiment)
            self.dataloader = self.train_dataloader
        else:
            self.visualizer = None
            self.dataloader = self.val_dataloader

        # init hooks and average meter
        self._hooks = []
        self.average_meter = AverageMeter()

        # init loop parameters
        self._epoch = 0
        self._max_epochs = max_epochs if mode == mv.ModeKey.TRAIN else 1
        self._inner_iter = 0
        self._iter = 0
        self._max_iters = 0

        # get model name from model class
        if hasattr(self.model, 'module'):
            self._model_name = self.model.module.__class__.__name__
        else:
            self._model_name = self.model.__class__.__name__