Example #1
0
def batch_gen(dicom_dataset):
    dicom_index = FilesIndex(path='./dicom/*', dirs=True)
    dicom_dataset = Dataset(dicom_index, batch_class=CTImagesMaskedBatch)

    create_blosc_dataset = dicom_dataset >> (
        Pipeline()
        .load(fmt='dicom')
        .dump(dst='./blosc', fmt='blosc',
              components=('images', 'origin', 'spacing'))
    )
    create_blosc_dataset.run(4)
    blosc_index = FilesIndex(path='./blosc/*', dirs=True)
    blosc_dataset = Dataset(blosc_index, batch_class=CTImagesMaskedBatch)
    yield blosc_dataset.gen_batch(2, n_epochs=None)
    print("Cleaning up generated blosc data...")
    shutil.rmtree('./blosc')
def crops_datasets(dicom_dataset, nodules, histo):
    pipeline = split_dump('./cancer', './ncancer', nodules, histo,
                          fmt='dicom', spacing=(1.7, 1.0, 1.0),
                          shape=(128, 128, 128), order=3,
                          padding='reflect', crop_size=(32, 64, 64))

    pipeline = (dicom_dataset >> pipeline)

    pipeline.next_batch(2)

    cancer_idx = FilesIndex(path='./cancer/*', dirs=True)
    ncancer_idx = FilesIndex(path='./ncancer/*', dirs=True)

    cancer_set = Dataset(cancer_idx, batch_class=CTImagesMaskedBatch)
    ncancer_set = Dataset(ncancer_idx, batch_class=CTImagesMaskedBatch)

    yield cancer_set, ncancer_set

    shutil.rmtree('./cancer')
    shutil.rmtree('./ncancer')
def dicom_dataset():
    if os.path.exists('./dicom'):
        shutil.rmtree('./dicom')

    generate_dicom_scans('./dicom')

    index = FilesIndex(path='./dicom/*', dirs=True)
    dataset = Dataset(index, batch_class=CTImagesMaskedBatch)
    yield dataset
    print("Cleaning up generated dicom data...")
    shutil.rmtree('./dicom')
Example #4
0
    def test_blosc_dump_sync(self, batch_with_nodules_and_masks, sync):
        _ = batch_with_nodules_and_masks.dump(dst='./dumped_blosc',  # noqa: F841
                                              fmt='blosc', sync=sync)
        blosc_index = FilesIndex(path='./dumped_blosc/*', dirs=True)
        blosc_dataset = Dataset(blosc_index, batch_class=CTImagesMaskedBatch)
        assert len(blosc_dataset) == len(batch_with_nodules_and_masks)

        batch = (  # noqa: F841
            blosc_dataset
            .next_batch(len(batch_with_nodules_and_masks))
            .load(fmt='blosc', sync=sync)
        )

        shutil.rmtree('./dumped_blosc')
Example #5
0
    def test_dicom_dump(self, batch_with_nodules_and_masks):
        _ = batch_with_nodules_and_masks.dump(dst='./dumped_dicoms',  # noqa: F841
                                              fmt='dicom')
        dicom_index = FilesIndex(path='./dumped_dicoms/*', dirs=True)
        dicom_dataset = Dataset(dicom_index, batch_class=CTImagesMaskedBatch)
        assert len(dicom_dataset) == len(batch_with_nodules_and_masks)

        batch = (  # noqa: F841
            dicom_dataset
            .next_batch(len(batch_with_nodules_and_masks))
            .load(fmt='dicom')
        )

        shutil.rmtree('./dumped_dicoms')
def test_create_crops(dicom_dataset, nodules):
    create_crops(dicom_dataset,
                 'dicom',
                 nodules,
                 None,
                 './test_crops',
                 config=get_config(config))

    cancer_idx = FilesIndex(path='./test_crops/original/cancer/*', dirs=True)
    ncancer_idx = FilesIndex(path='./test_crops/original/ncancer/*', dirs=True)

    cancer_set = Dataset(cancer_idx, batch_class=CTImagesMaskedBatch)
    ncancer_set = Dataset(ncancer_idx, batch_class=CTImagesMaskedBatch)

    assert len(cancer_set) != 0 and len(ncancer_set) != 0

    _ = (Pipeline(dataset=cancer_set).load(fmt='blosc',
                                           sync=True).next_batch(2))

    _ = (Pipeline(dataset=ncancer_set).load(fmt='blosc',
                                            sync=True).next_batch(2))

    shutil.rmtree('./test_crops')
Example #7
0
sys.path.append('../../')
from radio import CTImagesMaskedBatch as CTIMB
from radio.dataset import Dataset, Pipeline, FilesIndex, F, V, B, C, Config, L
from radio.dataset.research import Research, Option, KV
from radio.dataset.models.tf import UNet, VNet

# paths to scans in blosc and annotations-table
PATH_SCANS = './blosc/*'
PATH_ANNOTS = './annotations.csv'

# directory for saving models
MODELS_DIR = './trained_models/'

# dataset and annotations-table
index = FilesIndex(path=PATH_SCANS, dirs=True, no_ext=False)
dataset = Dataset(index=index, batch_class=CTIMB)
dataset.split(0.9, shuffle=120)
nodules = pd.read_csv(PATH_ANNOTS)


def train(batch,
          model='net',
          minibatch_size=8,
          mode='max',
          depth=6,
          stride=2,
          start=0,
          channels=3):
    """ Custom train method. Train a model in minibatches of xips, fetch loss and prediction.
    """
    # training components
Example #8
0
    def add_dataset(self, path):
        dicom_ix = ds.FilesIndex(path=os.path.join(path, '*'), dirs=True)
        dicom_dataset = Dataset(index=dicom_ix, batch_class=CTImagesDicomBatch)

        print(f"Dataset length: {len(dicom_dataset)}")
        self.full_pipe = dicom_dataset >> self.full_pipe
Example #9
0
NODULE_CONFIDENCE_THRESHOLD = 0.02
TRAIN_SHARE = 0.9
CANCEROUS_CROPS_PATH = '/notebooks/data/CT/npcmr_crops/train/cancerous'
NONCANCEROUS_CROPS_PATH = '/notebooks/data/CT/npcmr_crops/train/noncancerous'

# read df containing info about nodules on scans
dataset_info = (radio.annotation.read_dataset_info(NPCMR_GLOB,
                                                   index_col='seriesid',
                                                   filter_by_min_spacing=True,
                                                   load_origin=False))

# set up Index and Dataset for npcmr
ct_index = FilesIndex(dataset_info.index.values,
                      paths=dict(dataset_info.loc[:, 'ScanPath']),
                      dirs=True)
ct_dataset = Dataset(ct_index, batch_class=CTImagesMaskedBatch)

# read dumped annots
with open(MERGED_NODULES_PATH, 'rb') as file:
    merged = pkl.load(file)

# filter nodules by confidences
filtered = merged[merged.confidence > NODULE_CONFIDENCE_THRESHOLD]

ct_dataset.cv_split(TRAIN_SHARE)

# read histo of nodules locs
with open(HISTO_PATH, 'rb') as file:
    histo = pkl.load(file)

# split dump pipeline
import pandas as pd
from radio import CTImagesMaskedBatch
from radio.dataset import FilesIndex, Dataset, Pipeline, F
from radio.models import DilatedNoduleNet
from radio.models.tf.losses import tversky_loss

nodules_df = pd.read_csv('/path/to/annotations.csv')
luna_index = FilesIndex(path='/path/to/LunaDataset/*.mhd', no_ext=True)
luna_dataset = Dataset(index=luna_index, batch_class=CTImagesMaskedBatch)

preprocessing = (Pipeline()
                 .load(fmt='raw')
                 .unify_spacing(shape=(384, 512, 512), spacing=(3.5, 2.0, 2.0)))
                 .fetch_nodules_info(nodules_df)
                 .create_mask()
                 .normalize_hu())

spacing_randomizer = lambda *args: 0.2 * np.random.uniform(size=3) + [3.5, 2.0, 2.0]
augmentation = (Pipeline()                 
                .sample_nodules(nodule_size=(48, 76, 76))
                .rotate(random=True, angle=30, mask=True)
                .unify_spacing(spacing=F(spacing_randomizer), shape=(32, 64, 64)))


vnet_config = {'loss': tversky_loss,
               'inputs': dict(images={'shape': (32, 64, 64, 1)},
                              labels={'name': 'targets', 'shape': (32, 64, 64, 1)})}
vnet_config['input_block/inputs'] = 'images'
model_training = (Pipeline()
                  .init_model(name='vnet', model_class=DilatedNoduleNet, config=vnet_config)
                  .train_model(name='vnet', feed_dict={'images': F(CTIMB.unpack, component='images'),