def load_pipeline(nodules_df):
     pipeline = (Pipeline().load(
         fmt='blosc',
         components=[
             'spacing', 'origin', 'images', 'segmentation'
         ]).fetch_nodules_info_malignancy(nodules_df).create_mask()
                 )  #creates mask component with nodules
     return pipeline
Ejemplo n.º 2
0
def nodules(dicom_dataset):
    pipeline = dicom_dataset >> (Pipeline().init_variable(
        'nodules_list', []).load(fmt='dicom').update_variable(
            'nodules_list', F(generate_nodules), mode='a'))
    pipeline.run(batch_size=2)

    all_nodules = pd.concat(
        [df for df in pipeline.get_variable('nodules_list') if len(df) > 0])
    return all_nodules
 def test_combine_datasets(self, crops_datasets, batch_sizes, components):
     pipeline = (
         Pipeline()
         .load(fmt='blosc', components=components)
         .normalize_hu()
     )
     combine_pipeline = combine_datasets(crops_datasets,
                                         batch_sizes,
                                         pipeline)
     _ = combine_pipeline.next_batch(4)  # noqa: F841
Ejemplo n.º 4
0
def test_create_crops(dicom_dataset, nodules):
    create_crops(dicom_dataset,
                 'dicom',
                 nodules,
                 None,
                 './test_crops',
                 config=get_config(config))

    cancer_idx = FilesIndex(path='./test_crops/original/cancer/*', dirs=True)
    ncancer_idx = FilesIndex(path='./test_crops/original/ncancer/*', dirs=True)

    cancer_set = Dataset(cancer_idx, batch_class=CTImagesMaskedBatch)
    ncancer_set = Dataset(ncancer_idx, batch_class=CTImagesMaskedBatch)

    assert len(cancer_set) != 0 and len(ncancer_set) != 0

    _ = (Pipeline(dataset=cancer_set).load(fmt='blosc',
                                           sync=True).next_batch(2))

    _ = (Pipeline(dataset=ncancer_set).load(fmt='blosc',
                                            sync=True).next_batch(2))

    shutil.rmtree('./test_crops')
Ejemplo n.º 5
0
def batch_gen(dicom_dataset):
    dicom_index = FilesIndex(path='./dicom/*', dirs=True)
    dicom_dataset = Dataset(dicom_index, batch_class=CTImagesMaskedBatch)

    create_blosc_dataset = dicom_dataset >> (
        Pipeline()
        .load(fmt='dicom')
        .dump(dst='./blosc', fmt='blosc',
              components=('images', 'origin', 'spacing'))
    )
    create_blosc_dataset.run(4)
    blosc_index = FilesIndex(path='./blosc/*', dirs=True)
    blosc_dataset = Dataset(blosc_index, batch_class=CTImagesMaskedBatch)
    yield blosc_dataset.gen_batch(2, n_epochs=None)
    print("Cleaning up generated blosc data...")
    shutil.rmtree('./blosc')
Ejemplo n.º 6
0
    def __init__(self, cf, classifier, segmentator):

        self.cf = cf
        self.full_pipe = (Pipeline()
            .init_variable('segm_mask')
            .init_variable('conf_mask')
            .load(fmt='dicom')
            .unify_spacing(shape=(500, 300, 300), spacing=(1.0, 1.0, 1.0),
                           method='pil-simd', padding='constant')
            #.call(crop_img)
            .normalize_hu()
            .call(check_metrics)
            .predict_on_scan(
                model=lambda x:
                    torch.nn.functional.softmax(
                        torch.from_numpy(
                            classifier.predict(x)))[..., 1],
                    crop_shape=[64, 64, 64],
                    strides=[55, 55, 55],
                    batch_size=4,
                    data_format="channels_first",
                    model_type="callable",
                    targets_mode="classification",
                    show_progress=False
                )
            #.binarize_mask(threshold=0.7)
            .update_variable('conf_mask', B('masks'))
            .predict_on_scan(
                        model=segmentator.predict,
                        crop_shape=(64, 64, 64),
                        strides=(55, 55, 55),
                        batch_size=4,
                        data_format="channels_first",
                        model_type="callable",
                        targets_mode="segmentation",
                        show_progress=False
                    )
            .binarize_mask(threshold=0.1)
            .update_variable('segm_mask', B('masks'))
            .load(fmt='ndarray', masks=V('segm_mask') * V('conf_mask'))
            .fetch_nodules_from_mask()
            .call(check_overlap)
            .call(process_nodules))
flatten = cnn.get_layer(name="flatten_1")
inputs = [K.learning_phase()] + cnn.inputs
_flat_out = K.function(inputs, [flatten.output])


def flat_out_f(X):
    # The [0] is to disable the training phase flag
    return _flat_out([0] + [X])


#define names for folders
crops_folder = '../../../ResultingData/NoduleCrops'  #server path
pre_savepath = '../../../ResultingData/NoduleFeatures'  #change this lines into pe

#make dataset, and give dtaset to pipeline
pipeline_load = Pipeline().load(fmt='blosc',
                                components=['spacing', 'origin', 'images'])
dataset = make_dataset(os.path.join(crops_folder, '*'))
sample_line = (dataset >> pipeline_load)

#for each scan in batch, load scan, and compute features from scan. Next, each batch is saved
for i in range(int(np.ceil(len(dataset) / 5))):
    cbatch = sample_line.next_batch(batch_size=5,
                                    drop_last=False,
                                    shuffle=True)
    cim = cbatch.unpack(component='images', data_format='channels_last')
    features = flat_out_f(cim)
    for j in range(len(cim)):
        feat = features[0][j]
        totalpath = cbatch.index.get_fullpath(cbatch.indices[j])
        splits = totalpath.split(os.sep)
        savepath = pre_savepath + '/' + splits[-1]
def load_pipeline():
    pipeline = (Pipeline().load(fmt='dicom'))
    return pipeline
    plt.title(testname)
    plt.ylim((0,0.5))
    plt.savefig(savepath+'/Losses.png')

#create datasets voor cancer/noncancer and training/testing
cancer_testset= make_dataset(val_cancer_folder)
cancer_trainset= make_dataset(cancer_folder)                       


#make lists for the losses
losslist = []
test_losslist=[]


# create pipeline to load images and give dataset structures to pipeline
pipeline_load= (Pipeline().load(fmt='blosc', components=['spacing', 'origin', 'images','masks'])
                            .loadMalignancy())
    

#get training and testing pipelines with data
sample_cancer_train=(cancer_trainset >> pipeline_load)
sample_cancer_test=(cancer_testset >> pipeline_load)

#use seperate pipeline for evaluation to make sure all images are used for training itself
sample_cancer_train_eval=(cancer_trainset >> pipeline_load)



#training parameters
n_epochs=2 #number of epochs for cancer training set, others continue untill this one has finished
cancer_batchsize = 20
ncancer_batchsize = 20 #total batch size is cancer + ncancer batchsize
Ejemplo n.º 10
0
    return loss


def save_model(batch, pipeline, model='net'):
    """ Function for saving model.
    """
    model = pipeline.get_model_by_name(model)
    name = model.__class__.__name__
    model.save(MODELS_DIR + name)


# root, train, test pipelines
root_pipeline = (Pipeline().load(
    fmt='blosc', components=[
        'images', 'spacing', 'origin'
    ]).fetch_nodules_info(nodules=nodules).create_mask().run(batch_size=4,
                                                             shuffle=True,
                                                             n_epochs=None,
                                                             prefetch=3,
                                                             lazy=True))

train_pipeline = (Pipeline().init_variables(
    ['loss', 'predictions']).init_model('dynamic', C('model'), 'net',
                                        C('model_config')).call(train))

test_pipeline = (Pipeline().init_variables(['loss', 'predictions']).call(
    save_model, pipeline=C('train_pipeline')).import_model(
        'net', C('train_pipeline')).predict_model('net',
                                                  fetches='output_sigmoid',
                                                  feed_dict={
                                                      'images': B('nimages'),
                                                      'masks': B('nmasks'),
Ejemplo n.º 11
0
    #        np.save(os.path.join(path,'trainindex.npy'),luna_dataset.train.indices)
    #        np.save(os.path.join(path,'testindex.npy'), luna_dataset.test.indices)
    #
    #        #give them seperate names
    #        dataset_val=luna_dataset.test
    #        dataset_train=luna_dataset.train
    #
    #-----------------------------------------------------------------------

    #make pipeline to load and segment, saves segmentations in masks
    load_and_segment = (
        Pipeline().load(fmt='raw').get_lung_mask(
            rad=15).unify_spacing_withmask(
                shape=(400, 512, 512),
                spacing=(2.0, 1.0, 1.0),
                padding='constant')  #equalizes the spacings 
        #from both images and mask
        .normalize_hu(
            min_hu=-1200, max_hu=600
        )  #clips the HU values and linearly rescales them, values from grt team
        .apply_lung_mask(padding=170))

    ## uncomment this for validation data
    lunaline_test = (luna_dataset >> load_and_segment.dump(
        dst=LUNA_test,
        components=['spacing', 'origin', 'images', 'segmentation']))

    batch_size = 1
    for i in range(np.ceil(len(luna_dataset) / batch_size).astype(int)):
        batch = lunaline_test.next_batch(batch_size=batch_size,
                                         shuffle=False,
                                         n_epochs=1)
Ejemplo n.º 12
0
from CTImagesCustomBatch import CTImagesCustomBatch as CTICB  #custom batch class
import os
import CTsliceViewer as slices

nodules_path = 'C:/Users/linde/OneDrive - TU Eindhoven/TUE/Afstuderen/CSVFILES/AnnotatiesPim/nodule_data_adapted.xlsx'
data_path = 'D:/OnlyConv'

#get nodule info, the dtype preserves the leading zeros to to get folder names and this name equal, if  numbers are used this is not necessary
nodules_utrecht = pd.read_excel(nodules_path, dtype={'PatientID': str})

#make pipeline to load, and get the annotations
load_and_preprocess = (
    Pipeline().load(fmt='dicom').fetch_nodules_info_general(
        nodules_utrecht)  #loads nodule infomation into batch
    .create_mask().sample_nodules(batch_size=None,
                                  nodule_size=(16, 32, 32),
                                  share=(1.0),
                                  variance=(0, 0, 0),
                                  data='Utrecht'))

#create filesindex to iterate over all files
folder_path = os.path.join(data_path, '*')
scan_index = FilesIndex(path=folder_path, dirs=True)
scan_dataset = ds.Dataset(index=scan_index, batch_class=CTICB)

# get dataset to pipeline, and get a batch through the pipeline, for the next batch run the 2nd command multiple times
line = (scan_dataset >> load_and_preprocess)
batch = line.next_batch(batch_size=1)

slices.multi_slice_viewer(
    batch.images
def load_line():    
    return Pipeline().load(fmt='blosc', components=['spacing', 'origin', 'images'])
    train_folder = path + SaveFolder + sub + 'training'

    folderlist = [test_folder, train_folder]

    for folder in folderlist:
        if not os.path.exists(folder):
            os.makedirs(folder)

    #this pipeline does the preprocessing and gets the ground truth for the image
    preprocessing = (
        Pipeline().load(fmt='blosc',
                        components=[
                            'spacing', 'origin', 'images', 'masks'
                        ]).unify_spacing_withmask(
                            shape=(400, 512, 512),
                            spacing=(2.0, 1.0, 1.0),
                            padding='constant')  #equalizes the spacings 
        .normalize_hu(
            min_hu=-1200, max_hu=600
        )  #clips the HU values and linearly rescales them, values from grt team
        .apply_lung_mask(padding=170)  #eventueel nog bot weghalen)
        .fetch_nodules_info(nodules_df).create_mask())
    #  .predict_on_scans(cnn,strides=(8,16,16), crop_shape=(16,32,32), targets_mode='classification', model_type='keras'))
    #                              .create_mask()
    #

    preprocess_line_test = (
        luna_dataset_test >> preprocessing.dump(dst=test_folder))
    preprocess_line_train = (
        luna_dataset_train >> preprocessing.dump(dst=train_folder))

    for i in range(len(luna_dataset_test)):
Ejemplo n.º 15
0
from radio import dataset as ds
import CTsliceViewer as slice
from radio.dataset import Pipeline
#from memory_profiler import profile
#import gc
import numpy as np
from CTImagesCustomBatch import CTImagesCustomBatch as CTICB

#load data
Path = 'C:/Users/linde/Documents/PreprocessedImages1008/Spacing(2x1x1)/*'

luna_index = ds.FilesIndex(path=Path, dirs=True, sort=True)
luna_dataset = ds.Dataset(index=luna_index, batch_class=CTICB)

#create pipeline to load images, and spacing & origin information
pipeline_load = (Pipeline().load(
    fmt='blosc', components=['spacing', 'origin', 'images', 'segmentation']))

#give dataset to pipline and run it per batch
load_line = luna_dataset >> pipeline_load

#create lists for middle slices of masks and images, and list for index numbers
list_of_masks = []
list_of_im = []
list_of_indices = []

#obtain for whole batch middle slice of image and mask and index in list
batch_size = 1
for i in range(int(np.ceil(len(luna_dataset) / batch_size))):
    batch = load_line.next_batch(batch_size=batch_size, shuffle=False)
    arrayIm, arrayMask = batch.get_middle_slices(
    )  #function returns middle slices of batch
Ejemplo n.º 16
0
def eval_on_images(path,
                   cnn,
                   nodules_df,
                   crop_size=np.array([16, 32, 32]),
                   step_size=np.array([8, 8, 8]),
                   saveImages=False,
                   savepath='path'):
    #get data
    luna_index_test = ds.FilesIndex(path=path, dirs=True,
                                    sort=True)  # preparing indexing structure
    luna_dataset_test = ds.Dataset(index=luna_index_test, batch_class=CTICB)

    if not os.path.exists(savepath):
        os.makedirs(savepath)

    #this pipeline does the preprocessing and gets the ground truth for the image
    preprocessing = (Pipeline().load(
        fmt='blosc',
        components=['spacing', 'origin', 'images'
                    ]).fetch_nodules_info_Utrecht(nodules_df).create_mask())

    preprocess_line = (luna_dataset_test >> preprocessing)

    #possible thresholds
    treshold_list = [
        0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.85, 0.9, 0.93, 0.95, 0.97, 0.99,
        0.995, 0.998, 0.999, 0.9995, 0.9998, 0.9999, 1
    ]

    FalsePositiveList = np.zeros(
        [len(luna_dataset_test, ),
         len(treshold_list)])
    SensitivityList = np.zeros([len(luna_dataset_test, ), len(treshold_list)])
    TrueDetectedList = np.zeros([len(luna_dataset_test, ), len(treshold_list)])
    MissedDetectedList = np.zeros(
        [len(luna_dataset_test, ),
         len(treshold_list)])

    #define crop and stepsize, and batch size in which prediction should b emakde

    batch_size = 20

    folder = savepath + 'Image_Data'
    if not os.path.exists(folder):
        os.makedirs(folder)

    folder_files = savepath + 'Image_evaluation'
    if not os.path.exists(folder_files):
        os.makedirs(folder_files)
    index_list = []
    for k in range(len(luna_dataset_test)):
        start_time = time.clock()
        batch = preprocess_line.next_batch(batch_size=1, shuffle=False)
        if os.path.isdir(
                batch.index.get_fullpath(batch.index.indices[0]) +
                '/segmentation'):
            print(
                batch.index.get_fullpath(batch.index.indices[0]) +
                '/segmentation')
            batch.load(fmt='blosc', components=['segmentation'])
        im_index = batch.indices
        index_list.append(str(im_index))

        #crop images to bounding box to not classify to much if segmentaiton is present
        if batch.segmentation is not None:
            zmin, zmax, ymin, ymax, xmin, xmax = bbox2_3D(batch.segmentation)
            segmentation = batch.segmentation[zmin:zmax, ymin:ymax,
                                              xmin:xmax]  #extract segmentation

            bounding_im = batch.images[zmin:zmax, ymin:ymax, xmin:xmax]
            bounding_mask = batch.masks[zmin:zmax, ymin:ymax, xmin:xmax]

        else:
            bounding_im = batch.images
            bounding_mask = batch.masks
            segmentation = None

        #padd the images to ensure correct patch extraction at boundaries
        bounding_im_pad, bounding_mask_pad = pad_for_Prediction(
            bounding_im, bounding_mask, crop_size, step_size)

        #make empty array for prediction
        size = bounding_im.shape
        prediction_size = np.ceil(size / step_size).astype(
            int)  #make sure all pixels got a mini-box
        prediction_map = np.zeros(prediction_size)

        start_pred_time = time.clock()

        #get prediction map of image
        prediction_map = get_prediction_map(cnn, bounding_im_pad,
                                            prediction_map, step_size,
                                            crop_size, batch_size)
        #cast prediction map to same size as prediction image
        prediction_im = get_prediction_image(bounding_im, prediction_map,
                                             step_size)

        if segmentation is not None:

            prediction_im = prediction_im * segmentation  #all predictions outside segmentation are not relevant
        #save predicted images
        if saveImages == True:
            np.save(folder + '/' + 'prediction_im' + str(k), prediction_im)
            np.save(folder + '/' + 'bounding_im' + str(k), bounding_im)
            np.save(folder + '/' + 'bounding_mask' + str(k), bounding_mask)
        # np.save(folder+'/'+  'bounding_irrel'+str(k), bounding_segm)

        #determine of predictions are correct
        for i in range(len(treshold_list)):
            treshold = treshold_list[i]
            TrueDetected, MissedDetected, FalsePositive = verify_predictions(
                prediction_im, bounding_mask, treshold, FP_correction=False)
            Sensitivity = calc_Sensitivity(TrueDetected, MissedDetected)
            SensitivityList[k, i] = Sensitivity
            FalsePositiveList[k, i] = FalsePositive
            TrueDetectedList[k, i] = TrueDetected
            MissedDetectedList[k, i] = MissedDetected

        print((time.clock() - start_pred_time) / 60)
        print("--- %s minutes ---" % ((time.clock() - start_time) / 60))

    cor_tresh = np.sum(TrueDetectedList, 0)
    mis_tresh = np.sum(MissedDetectedList, 0)
    sens_tresh = cor_tresh / (cor_tresh + mis_tresh)
    fp_tresh = np.mean(FalsePositiveList, 0)

    #save all files
    np.save(folder_files + '/FPlist', FalsePositiveList)
    np.save(folder_files + '/SensList', SensitivityList)
    np.save(folder_files + '/TrueDetected', TrueDetectedList)
    np.save(folder_files + '/MissedDetected', MissedDetectedList)

    np.save(folder_files + '/sens_tresh', sens_tresh)
    np.save(folder_files + '/fp_tresh', fp_tresh)

    #writer seriesuid to excel file
    df = pd.DataFrame({'series': index_list})
    writer = pd.ExcelWriter(folder_files + '/seriesUID.xlsx',
                            engine='xlsxwriter')
    df.to_excel(writer, index=False, header=True)

    #writer seriesuid to excel file
    df_2 = pd.DataFrame({
        'treshold': treshold_list,
        'sensitivity': sens_tresh,
        'false positives': fp_tresh
    })
    writer = pd.ExcelWriter(folder_files + '/FROC.xlsx', engine='xlsxwriter')
    df_2.to_excel(writer, index=False, header=True)

    #calculate overall sensitivy and fp_rate
    total_correct_detected = np.sum(TrueDetectedList, 0)
    total_nodules = total_correct_detected + np.sum(MissedDetectedList, 0)
    Sensitivity = np.divide(total_correct_detected, total_nodules)
def eval_on_images(path,cnn, nodules_df, nodules_eval, crop_size = np.array([16,32,32]), step_size = np.array([8,8,8]),FPminingNeed=False,saveImages=False):

    #get data
    luna_index_test = ds.FilesIndex(path=path, dirs=True)      # preparing indexing structure
    luna_dataset_test = ds.Dataset(index=luna_index_test, batch_class=CTICB)
        

    #replace negative diameters in irrelevant findings with small diamter of 3mm   
    nodules_eval['diameter_mm']=nodules_eval['diameter_mm'].replace(-1,3)
    
    #this pipeline does the preprocessing and gets the ground truth for the image
    preprocessing	       =     (Pipeline()
                                  .load(fmt='blosc', components=['spacing', 'origin', 'images','segmentation']) 
                                  .fetch_nodules_info(nodules_df)
                                  .create_mask()
                                  .fetch_nodules_info(nodules_eval ,update=True))
    
    preprocess_line=(luna_dataset_test >> preprocessing) 
    
    #possible thresholds
    treshold_list=[  0.5, 0.7, 0.8, 0.85, 0.9, 0.93, 0.95, 0.97,0.99,0.995,0.998,0.999, 0.9995,0.9998, 0.9999,1]


    FalsePositiveList=np.zeros([len(luna_dataset_test, ), len(treshold_list)])
    SensitivityList=np.zeros([len(luna_dataset_test, ), len(treshold_list)])
    TrueDetectedList=np.zeros([len(luna_dataset_test, ), len(treshold_list)])
    MissedDetectedList=np.zeros([len(luna_dataset_test, ), len(treshold_list)])
    FPminingNeed=False
    FPminingList=[]
    
    #define batch size in which prediction should b emakde
    batch_size=20
    
    folder='Image_Data'
    if not os.path.exists(folder):
        os.makedirs(folder)
        
    folder_files='Image_evaluation'   
    if not os.path.exists(folder_files):
        os.makedirs(folder_files)                 
    index_list=[]
    
    for k in range(len(luna_dataset_test)):
        start_time = time.clock()
        batch=preprocess_line.next_batch(batch_size=1)
        im_index=batch.indices
        index_list.append(str(im_index))
        
        #crop images to bounding box to not classify to much
        zmin,zmax,ymin,ymax,xmin,xmax=bbox2_3D(batch.segmentation)
        segmentation=batch.segmentation[zmin:zmax,ymin:ymax,xmin:xmax] #extract segmentation
    

        batch.create_mask_irrelevant() #when segmentation is no longer needed, put irrelevant findings into this component
        
        #cut all images to they bounding box of segmentation to reduce computational load
        bounding_im=batch.images[zmin:zmax,ymin:ymax,xmin:xmax]
        bounding_mask=batch.masks[zmin:zmax,ymin:ymax,xmin:xmax]
        bounding_segm=batch.segmentation[zmin:zmax,ymin:ymax,xmin:xmax]
        
        
        #padd the images to ensure correct patch extraction at boundaries
        bounding_im_pad,bounding_mask_pad,bounding_segm_pad=pad_for_Prediction(bounding_im, bounding_mask,bounding_segm, crop_size,step_size)
        
        #make empty array for prediction
        size=bounding_im.shape
        prediction_size=np.ceil(size/step_size).astype(int) #make sure all pixels got a mini-box
        prediction_map=np.zeros(prediction_size)
        
        start_pred_time=time.clock()
            
        #get prediction map of image
        prediction_map=get_prediction_map(cnn,bounding_im_pad, prediction_map, step_size, crop_size,batch_size)
        #cast prediction map to same size as prediction image
        prediction_im=get_prediction_image(bounding_im, prediction_map, step_size)
        
        prediction_im=prediction_im * segmentation #all predictions outside segmentation are not relevant
        #save predicted images
        if saveImages==True:
           np.save(folder+'/'+  'prediction_im'+str(k), prediction_im)
           np.save(folder+'/'+  'bounding_im'+str(k), bounding_im)
           np.save(folder+'/'+ 'bounding_mask'+str(k), bounding_mask)
           np.save(folder+'/'+  'bounding_irrel'+str(k), bounding_segm)
        
          #determine of predictions are correct
        for i in range(len(treshold_list)):
           treshold= treshold_list[i]
           TrueDetected, MissedDetected, FalsePositive=verify_predictions(prediction_im, bounding_mask,bounding_segm, treshold)
           Sensitivity=calc_Sensitivity(TrueDetected,MissedDetected)
           SensitivityList[k,i]=Sensitivity
           FalsePositiveList[k,i]=FalsePositive
           TrueDetectedList[k,i]=TrueDetected
           MissedDetectedList[k,i]=MissedDetected  
    
    
    

    
    #do false positive mining if wanted
    
#        if FPminingNeed==True:    
#           detections=measure.regionprops(label_prediction)
#           FPminingList=FPmining(detections, correct_labels, batch, zmin,ymin,xmin,FPminingList)


    
    
        
        print( (time.clock()-start_pred_time)/60)
        print("--- %s minutes ---" % ((time.clock() - start_time)/60))
     
    #determine number of detected / missed nodules
    cor_tresh=np.sum(TrueDetectedList,0)
    mis_tresh=np.sum(MissedDetectedList,0)
    sens_tresh=cor_tresh/(cor_tresh+mis_tresh)
    fp_tresh=np.mean(FalsePositiveList,0)   
    
    #save all files
    np.save(folder_files + '/FPlist',FalsePositiveList  )
    np.save(folder_files + '/SensList', SensitivityList)
    np.save(folder_files + '/TrueDetected',TrueDetectedList    )
    np.save(folder_files + '/MissedDetected', MissedDetectedList) 

    np.save(folder_files + '/sens_tresh',sens_tresh)
    np.save(folder_files + '/fp_tresh',fp_tresh)

    #writer seriesuid to excel file
    df=pd.DataFrame({'series':index_list})
    writer = pd.ExcelWriter(folder_files+'/seriesUID.xlsx', engine='xlsxwriter')
    df.to_excel(writer, index=False,header=True)

    #writer seriesuid to excel file
    df_2=pd.DataFrame({'treshold':treshold_list, 'sensitivity':sens_tresh, 'false positives': fp_tresh})
    writer = pd.ExcelWriter(folder_files+'/FROC.xlsx', engine='xlsxwriter')
    df_2.to_excel(writer, index=False,header=True)

    #calculate overall sensitivy and fp_rate
    total_correct_detected=np.sum(TrueDetectedList,0)
    total_nodules=total_correct_detected + np.sum(MissedDetectedList,0)
    Sensitivity=np.divide(total_correct_detected,total_nodules)
    fp_rate=np.mean(FalsePositiveList,0)
    
    
    #save sensitivity to txt file
    names  = np.array(['Sensitivity:','FalsePositives:'])
    floats = np.array([Sensitivity[8], fp_rate[8] ])
    
    ab = np.zeros(names.size, dtype=[('var1', 'U20'), ('var2', float)])
    ab['var1'] = names
    ab['var2'] = floats
    
    np.savetxt(folder_files + '/accuracy.txt', ab, fmt="%18s %10.3f")
    
    
    
    
    
    #write false positive list to file
    if FPminingNeed==True:
        a=pd.DataFrame(FPminingList)
        writer = pd.ExcelWriter(folder_files+'/FalsePositiveMiningList.xlsx', engine='xlsxwriter')
        a.columns = ["seriesuid", "coordX", "coordY", "coordZ"]
        a.to_excel(writer, index=False,header=True)
luna_index_train = FilesIndex(path=path, no_ext=True)      


  # preparing indexing structure


ixs = np.array(['1.3.6.1.4.1.14519.5.2.1.6279.6001.750792629100457382099842515038'])


two_scans_dataset = ds.Dataset(index=luna_index_train.create_subset(ixs), batch_class=CTICB)
luna_dataset_train = ds.Dataset(index=luna_index_train, batch_class=CTICB)


nodules_malignancy=pd.read_excel('C:/Users/s120116/OneDrive - TU Eindhoven/TUE/Afstuderen/CSVFILES/all_info_averaged_observer_corrected2.xlsx')

pipeline=   (Pipeline()
            .load(fmt='raw'))
            .fetch_nodules_info(nodules_df_2)
            .create_mask())


def load_pipeline(nodules_df):
    pipeline=   (Pipeline()
            .load(fmt='blosc', components=['spacing', 'origin', 'images','segmentation'])
           
      .fetch_nodules_info_malignancy(nodules_df)
           .create_mask()) #creates mask component with nodules
    return  pipeline                      
                 
cancer_cropline=load_pipeline(nodules_malignancy)

cancer_train=(two_scans_dataset >> load_and_segment)
Ejemplo n.º 19
0
import pandas as pd
from radio import CTImagesMaskedBatch
from radio.dataset import FilesIndex, Dataset, Pipeline, F
from radio.models import DilatedNoduleNet
from radio.models.tf.losses import tversky_loss

nodules_df = pd.read_csv('/path/to/annotations.csv')
luna_index = FilesIndex(path='/path/to/LunaDataset/*.mhd', no_ext=True)
luna_dataset = Dataset(index=luna_index, batch_class=CTImagesMaskedBatch)

preprocessing = (Pipeline()
                 .load(fmt='raw')
                 .unify_spacing(shape=(384, 512, 512), spacing=(3.5, 2.0, 2.0)))
                 .fetch_nodules_info(nodules_df)
                 .create_mask()
                 .normalize_hu())

spacing_randomizer = lambda *args: 0.2 * np.random.uniform(size=3) + [3.5, 2.0, 2.0]
augmentation = (Pipeline()                 
                .sample_nodules(nodule_size=(48, 76, 76))
                .rotate(random=True, angle=30, mask=True)
                .unify_spacing(spacing=F(spacing_randomizer), shape=(32, 64, 64)))


vnet_config = {'loss': tversky_loss,
               'inputs': dict(images={'shape': (32, 64, 64, 1)},
                              labels={'name': 'targets', 'shape': (32, 64, 64, 1)})}
vnet_config['input_block/inputs'] = 'images'
model_training = (Pipeline()
                  .init_model(name='vnet', model_class=DilatedNoduleNet, config=vnet_config)
                  .train_model(name='vnet', feed_dict={'images': F(CTIMB.unpack, component='images'),
#from each dicom folder, add one file to filesindex. This makes sure that with next batch, the next
#dicom scan is loaded and not the next slice (file)
fileList = []
for i in range(1, 3):  #from 1 to number of scans
    number = '00' + str(i)
    path = 'C:/Users/linde/Documents/DAta/DATA/Use/' + number + '/conventional'
    fileList.append(path + '/' + os.listdir(path)[0])

#set up dataset structure
luna_index = FilesIndex(path=fileList, no_ext=False,
                        sort=True)  # preparing indexing structure

luna_dataset = ds.Dataset(index=luna_index, batch_class=CTImagesCustomBatch)

#load pipeline
load_LUNA = (Pipeline().load(fmt='dicom').get_lung_mask(rad=10))

lunaline = luna_dataset >> load_LUNA.dump(
    dst=save_folder, components=['spacing', 'origin', 'images', 'masks'])

#get next batch
list_int = []
i = 0
while True:
    try:
        batch = lunaline.next_batch(batch_size=1, shuffle=False, n_epochs=1)
        im_array = batch.images
        [values, count] = np.unique(im_array, return_counts=True)
        list_int.append([batch.index.indices, values, count])
        i = i + 1
def load_pipeline(nodules_df):
    pipeline=   (Pipeline()
            .load(fmt='blosc', components=['spacing', 'origin', 'images','segmentation'])
#makes folder for all savings
LUNA_val='C:/Users/s120116/Documents/Preprocessed_Images/'+subset+' - split/validate' 
LUNA_train= 'C:/Users/s120116/Documents/Preprocessed_Images/'+subset+' - split/training' 


luna_index = FilesIndex(path=LUNA_MASK, no_ext=True) 


ixs = np.array([

'1.3.6.1.4.1.14519.5.2.1.6279.6001.228511122591230092662900221600'])
fix_ds = ds.Dataset(index=luna_index.create_subset(ixs), batch_class=CTImagesCustomBatch) 

 #make pipeline to load and segment, saves segmentations in masks
load_and_segment     = (Pipeline()
                        .load(fmt='raw')
                        .get_lung_mask(rad=15))
                    #  .unify_spacing_withmask(shape=(400,512,512), spacing=(2.0,1.0,1.0),padding='constant') #equalizes the spacings 
                              #from both images and mask
                     # .normalize_hu(min_hu=-1200, max_hu=600) #clips the HU values and linearly rescales them, values from grt team
                      #.apply_lung_mask(padding=170))


#pass training dataset through pipeline
lunaline_train=(fix_ds >> load_and_segment )#.dump(dst=LUNA_train,components=['spacing', 'origin', 'images','segmentation']))
batch=lunaline_train.next_batch(batch_size=1, shuffle=False,n_epochs=1)

batch.dump(dst='C:/Users/s120116/Documents/Preprocessed_Images/subset1 - split', components=['spacing', 'origin', 'images','segmentation']) 


if not os.path.exists(savepath):
    os.makedirs(savepath)

#create filesindex to iterate over all files
folder_path = os.path.join(data_path, '*')
scan_index = FilesIndex(path=folder_path, dirs=True)
scan_dataset = ds.Dataset(index=scan_index, batch_class=CTICB)

#to check index / dataset use: luna_index.indices or scan_dataset.index.indices
#should contain list of names of folders (for each scan a folder), names should be different for each scan

#make pipeline to load, equalize spacing and normalize the data
load_and_preprocess = (
    Pipeline().load(fmt='dicom')  #loads all slices from folder in dataset
    .unify_spacing(shape=(400, 512, 512),
                   spacing=(2.0, 1.0, 1.0),
                   padding='constant')  #equalizes the spacings
    .normalize_hu(min_hu=-1200,
                  max_hu=600)  #clips the HU values and linearly rescales them
)

##pass training dataset through pipeline
preprocessing_pipeline = (scan_dataset >> load_and_preprocess.dump(
    dst=savepath, components=['images', 'spacing', 'origin']))

#get scans one by one through the pipeline
for i in range(len(scan_dataset)):
    print('prepoccesing scan nr:' + str(i))
    batch = preprocessing_pipeline.next_batch(batch_size=1,
                                              shuffle=False,
                                              n_epochs=1,
                                              drop_last=False)
Ejemplo n.º 24
0
if not os.path.exists(save_path):
    os.makedirs(save_path)

for string in ['PE']:  #still do PE

    path_cs = "C:/Users/linde/Documents/CS_PE_seperated/" + string + "/*"

    cs_index = FilesIndex(path=path_cs, dirs=True, sort=True)
    cs_dataset = ds.Dataset(index=cs_index, batch_class=CTImagesCustomBatch)

    #load and normalize these images
    load_and_normalize = (
        Pipeline().load(
            fmt='blosc',
            components=['spacing', 'origin', 'images']).unify_spacing(
                shape=(400, 512, 512),
                spacing=(2.0, 1.0, 1.0),
                padding='constant')  #equalizes the spacings 
        #from both images and mask
        .normalize_hu(min_hu=-1200, max_hu=600)
    )  #clips the HU values and linearly rescales them, values from grt team
    #  .apply_lung_mask(paddi

    Path = 'C:/Users/linde/Documents/PreprocessedImages1008CorrectConvs/Spacing(2x1x1)/0*'
    loadSegm = (Pipeline().load(fmt='blosc',
                                components=['segmentation', 'masks']))

    im_index = FilesIndex(path=Path, dirs=True)

    lunaline_train = (cs_dataset >> load_and_normalize)

    for i in range(len(cs_dataset)):
Ejemplo n.º 25
0
#makes folder for all savings
if not os.path.exists(savepath_preprocess):
    os.makedirs(savepath_preprocess)
    

#create filesindex to iterate over all files
folder_path=os.path.join(data_path, '*')
scan_index=FilesIndex(path=folder_path,dirs=True)
scan_dataset = ds.Dataset(index=scan_index, batch_class=CTICB)

#to check index / dataset use: luna_index.indices or scan_dataset.index.indices
#should contain list of names of folders (for each scan a folder), names should be different for each scan    

#make pipeline to load, equalize spacing and normalize the data
load_and_preprocess     = (Pipeline()
                        .load(fmt='dicom') #loads all slices from folder in dataset
                        .unify_spacing(shape=(400,512,512), spacing=(2.0,1.0,1.0),padding='constant')#equalizes the spacings
                       .normalize_hu(min_hu=-1200, max_hu=600) #clips the HU values and linearly rescales them
                      )

##pass training dataset through pipeline
preprocessing_pipeline=(scan_dataset>> load_and_preprocess.dump(dst=savepath_preprocess,components=['images', 'spacing', 'origin' ]))


#get scans one by one through the pipeline
for i in range(len(scan_dataset)):
    print('prepoccesing scan nr:'+ str(i))
    batch=preprocessing_pipeline.next_batch(batch_size=1, shuffle=False,n_epochs=1, drop_last=False)