def batch_gen(dicom_dataset): dicom_index = FilesIndex(path='./dicom/*', dirs=True) dicom_dataset = Dataset(dicom_index, batch_class=CTImagesMaskedBatch) create_blosc_dataset = dicom_dataset >> ( Pipeline() .load(fmt='dicom') .dump(dst='./blosc', fmt='blosc', components=('images', 'origin', 'spacing')) ) create_blosc_dataset.run(4) blosc_index = FilesIndex(path='./blosc/*', dirs=True) blosc_dataset = Dataset(blosc_index, batch_class=CTImagesMaskedBatch) yield blosc_dataset.gen_batch(2, n_epochs=None) print("Cleaning up generated blosc data...") shutil.rmtree('./blosc')
def crops_datasets(dicom_dataset, nodules, histo): pipeline = split_dump('./cancer', './ncancer', nodules, histo, fmt='dicom', spacing=(1.7, 1.0, 1.0), shape=(128, 128, 128), order=3, padding='reflect', crop_size=(32, 64, 64)) pipeline = (dicom_dataset >> pipeline) pipeline.next_batch(2) cancer_idx = FilesIndex(path='./cancer/*', dirs=True) ncancer_idx = FilesIndex(path='./ncancer/*', dirs=True) cancer_set = Dataset(cancer_idx, batch_class=CTImagesMaskedBatch) ncancer_set = Dataset(ncancer_idx, batch_class=CTImagesMaskedBatch) yield cancer_set, ncancer_set shutil.rmtree('./cancer') shutil.rmtree('./ncancer')
def dicom_dataset(): if os.path.exists('./dicom'): shutil.rmtree('./dicom') generate_dicom_scans('./dicom') index = FilesIndex(path='./dicom/*', dirs=True) dataset = Dataset(index, batch_class=CTImagesMaskedBatch) yield dataset print("Cleaning up generated dicom data...") shutil.rmtree('./dicom')
def test_blosc_dump_sync(self, batch_with_nodules_and_masks, sync): _ = batch_with_nodules_and_masks.dump(dst='./dumped_blosc', # noqa: F841 fmt='blosc', sync=sync) blosc_index = FilesIndex(path='./dumped_blosc/*', dirs=True) blosc_dataset = Dataset(blosc_index, batch_class=CTImagesMaskedBatch) assert len(blosc_dataset) == len(batch_with_nodules_and_masks) batch = ( # noqa: F841 blosc_dataset .next_batch(len(batch_with_nodules_and_masks)) .load(fmt='blosc', sync=sync) ) shutil.rmtree('./dumped_blosc')
def test_dicom_dump(self, batch_with_nodules_and_masks): _ = batch_with_nodules_and_masks.dump(dst='./dumped_dicoms', # noqa: F841 fmt='dicom') dicom_index = FilesIndex(path='./dumped_dicoms/*', dirs=True) dicom_dataset = Dataset(dicom_index, batch_class=CTImagesMaskedBatch) assert len(dicom_dataset) == len(batch_with_nodules_and_masks) batch = ( # noqa: F841 dicom_dataset .next_batch(len(batch_with_nodules_and_masks)) .load(fmt='dicom') ) shutil.rmtree('./dumped_dicoms')
def test_create_crops(dicom_dataset, nodules): create_crops(dicom_dataset, 'dicom', nodules, None, './test_crops', config=get_config(config)) cancer_idx = FilesIndex(path='./test_crops/original/cancer/*', dirs=True) ncancer_idx = FilesIndex(path='./test_crops/original/ncancer/*', dirs=True) cancer_set = Dataset(cancer_idx, batch_class=CTImagesMaskedBatch) ncancer_set = Dataset(ncancer_idx, batch_class=CTImagesMaskedBatch) assert len(cancer_set) != 0 and len(ncancer_set) != 0 _ = (Pipeline(dataset=cancer_set).load(fmt='blosc', sync=True).next_batch(2)) _ = (Pipeline(dataset=ncancer_set).load(fmt='blosc', sync=True).next_batch(2)) shutil.rmtree('./test_crops')
sys.path.append('../../') from radio import CTImagesMaskedBatch as CTIMB from radio.dataset import Dataset, Pipeline, FilesIndex, F, V, B, C, Config, L from radio.dataset.research import Research, Option, KV from radio.dataset.models.tf import UNet, VNet # paths to scans in blosc and annotations-table PATH_SCANS = './blosc/*' PATH_ANNOTS = './annotations.csv' # directory for saving models MODELS_DIR = './trained_models/' # dataset and annotations-table index = FilesIndex(path=PATH_SCANS, dirs=True, no_ext=False) dataset = Dataset(index=index, batch_class=CTIMB) dataset.split(0.9, shuffle=120) nodules = pd.read_csv(PATH_ANNOTS) def train(batch, model='net', minibatch_size=8, mode='max', depth=6, stride=2, start=0, channels=3): """ Custom train method. Train a model in minibatches of xips, fetch loss and prediction. """ # training components
def add_dataset(self, path): dicom_ix = ds.FilesIndex(path=os.path.join(path, '*'), dirs=True) dicom_dataset = Dataset(index=dicom_ix, batch_class=CTImagesDicomBatch) print(f"Dataset length: {len(dicom_dataset)}") self.full_pipe = dicom_dataset >> self.full_pipe
NODULE_CONFIDENCE_THRESHOLD = 0.02 TRAIN_SHARE = 0.9 CANCEROUS_CROPS_PATH = '/notebooks/data/CT/npcmr_crops/train/cancerous' NONCANCEROUS_CROPS_PATH = '/notebooks/data/CT/npcmr_crops/train/noncancerous' # read df containing info about nodules on scans dataset_info = (radio.annotation.read_dataset_info(NPCMR_GLOB, index_col='seriesid', filter_by_min_spacing=True, load_origin=False)) # set up Index and Dataset for npcmr ct_index = FilesIndex(dataset_info.index.values, paths=dict(dataset_info.loc[:, 'ScanPath']), dirs=True) ct_dataset = Dataset(ct_index, batch_class=CTImagesMaskedBatch) # read dumped annots with open(MERGED_NODULES_PATH, 'rb') as file: merged = pkl.load(file) # filter nodules by confidences filtered = merged[merged.confidence > NODULE_CONFIDENCE_THRESHOLD] ct_dataset.cv_split(TRAIN_SHARE) # read histo of nodules locs with open(HISTO_PATH, 'rb') as file: histo = pkl.load(file) # split dump pipeline
import pandas as pd from radio import CTImagesMaskedBatch from radio.dataset import FilesIndex, Dataset, Pipeline, F from radio.models import DilatedNoduleNet from radio.models.tf.losses import tversky_loss nodules_df = pd.read_csv('/path/to/annotations.csv') luna_index = FilesIndex(path='/path/to/LunaDataset/*.mhd', no_ext=True) luna_dataset = Dataset(index=luna_index, batch_class=CTImagesMaskedBatch) preprocessing = (Pipeline() .load(fmt='raw') .unify_spacing(shape=(384, 512, 512), spacing=(3.5, 2.0, 2.0))) .fetch_nodules_info(nodules_df) .create_mask() .normalize_hu()) spacing_randomizer = lambda *args: 0.2 * np.random.uniform(size=3) + [3.5, 2.0, 2.0] augmentation = (Pipeline() .sample_nodules(nodule_size=(48, 76, 76)) .rotate(random=True, angle=30, mask=True) .unify_spacing(spacing=F(spacing_randomizer), shape=(32, 64, 64))) vnet_config = {'loss': tversky_loss, 'inputs': dict(images={'shape': (32, 64, 64, 1)}, labels={'name': 'targets', 'shape': (32, 64, 64, 1)})} vnet_config['input_block/inputs'] = 'images' model_training = (Pipeline() .init_model(name='vnet', model_class=DilatedNoduleNet, config=vnet_config) .train_model(name='vnet', feed_dict={'images': F(CTIMB.unpack, component='images'),