def PredictCSV(modelloc, outdir, indir=settings.options.dbfile): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 logfileoutputdir = outdir os.system('mkdir -p '+logfileoutputdir) os.system('mkdir -p '+logfileoutputdir + '/predictions') savedir = logfileoutputdir+'/predictions' print('loading model from', modelloc) customdict={ \ 'dsc_l2': dsc_l2, 'dsc_matlab': dsc_matlab, 'dsc_matlab_l2': dsc_matlab_l2, 'dsc_l2_liver': dsc_l2_liver, 'dsc_l2_tumor': dsc_l2_tumor, 'dsc_l2_background': dsc_l2_background, 'ISTA': ISTA, 'DepthwiseConv3D': DepthwiseConv3D, } loaded_model = load_model(modelloc, compile=False, custom_objects=customdict) opt = GetOptimizer() lss, met = GetLoss() loaded_model.compile(loss=lss, metrics=met, optimizer=opt) with open(indir, 'r') as csvfile: myreader = csv.DictReader(csvfile, delimiter=',') for row in myreader: dataid = int(row['dataid']) if dataid in test_index: imageloc = '%s/%s' % (settings.options.rootlocation, row['image']) segloc = '%s/%s' % (settings.options.rootlocation, row['label']) saveloc = savedir+'/pred-'+str(dataid) PredictNifti(loaded_model, saveloc, imageloc, segloc=segloc)
def PredictModel(): model = settings.options.predictmodel image = settings.options.predictimage outdir = settings.options.segmentation if (model != None and image != None and outdir != None): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 imagepredict = nib.load(image) imageheader = imagepredict.header numpypredict = imagepredict.get_data().astype(settings.IMG_DTYPE) assert numpypredict.shape[0:2] == (settings._globalexpectedpixel, settings._globalexpectedpixel) nslice = numpypredict.shape[2] resizepredict = skimage.transform.resize( numpypredict, (settings.options.trainingresample, settings.options.trainingresample, nslice), order=0, preserve_range=True, mode='constant').astype(settings.IMG_DTYPE).transpose(2, 1, 0) opt = GetOptimizer() lss, met = GetLoss() loaded_model = get_unet() loaded_model.compile(loss=lss, metrics=met, optimizer=opt) loaded_model.load_weights(model) segout_float = loaded_model.predict(resizepredict[..., np.newaxis]) segout_int = (segout_float[..., 0] >= settings.options.segthreshold).astype(settings.SEG_DTYPE) segout_float_resize = skimage.transform.resize( segout_float[..., 0], (nslice, settings._globalexpectedpixel, settings._globalexpectedpixel), order=0, preserve_range=True, mode='constant').transpose(2, 1, 0) segout_float_img = nib.Nifti1Image(segout_float_resize, None, header=imageheader) segout_float_img.to_filename( outdir.replace('.nii.gz', '-predtumorfloat.nii.gz')) segout_int_resize = skimage.transform.resize( segout_int[..., 0], (nslice, settings._globalexpectedpixel, settings._globalexpectedpixel), order=0, preserve_range=True, mode='constant').transpose(2, 1, 0) segout_int_img = nib.Nifti1Image(segout_int_resize, None, header=imageheader) segout_int_img.to_filename( outdir.replace('.nii.gz', '-predtumorseg.nii.gz')) return segout_int_resize
def PredictModel(model=settings.options.predictmodel, image=settings.options.predictimage, imageheader=None, outdir=settings.options.segmentation): if (model != None and image != None and outdir != None): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 numpypredict, origheader, _ = preprocess.reorient(image) assert numpypredict.shape[0:2] == (settings._globalexpectedpixel, settings._globalexpectedpixel) resizepredict = preprocess.resize_to_nn(numpypredict) resizepredict = preprocess.window(resizepredict, settings.options.hu_lb, settings.options.hu_ub) resizepredict = preprocess.rescale(resizepredict, settings.options.hu_lb, settings.options.hu_ub) opt = GetOptimizer() lss, met = GetLoss() loaded_model = get_unet() loaded_model.compile(loss=lss, metrics=met, optimizer=opt) loaded_model.load_weights(model) segout_float = loaded_model.predict(resizepredict[..., np.newaxis])[..., 0] segout_int = (segout_float >= settings.options.segthreshold).astype( settings.SEG_DTYPE) segin_windowed = preprocess.resize_to_original(resizepredict) segin_windowed_img = nib.Nifti1Image(segin_windowed, None, header=origheader) segin_windowed_img.to_filename( outdir.replace('.nii', '-imgin-windowed.nii')) segout_float_resize = preprocess.resize_to_original(segout_float) segout_float_img = nib.Nifti1Image(segout_float_resize, None, header=origheader) segout_float_img.to_filename(outdir.replace('.nii', '-pred-float.nii')) segout_int_resize = preprocess.resize_to_original(segout_int) segout_int_img = nib.Nifti1Image(segout_int_resize, None, header=origheader) segout_int_img.to_filename(outdir.replace('.nii', '-pred-seg.nii')) return segout_float_resize, segout_int_resize
def PredictKFold(modelloc, dbfile, outdir, kfolds=settings.options.kfolds, idfold=settings.options.idfold, saveloclist=None): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 (train_index, test_index, valid_index) = GetSetupKfolds(settings.options.dbfile, kfolds, idfold) logfileoutputdir = '%s/%03d/%03d' % (outdir, kfolds, idfold) os.system('mkdir -p ' + logfileoutputdir) os.system('mkdir -p ' + logfileoutputdir + '/predictions') savedir = logfileoutputdir + '/predictions' print('loading model from', modelloc) customdict={ \ 'dsc_l2': dsc_l2, 'ISTA': ISTA, 'DepthwiseConv3D': DepthwiseConv3D, } loaded_model = load_model(modelloc, compile=False, custom_objects=customdict) opt = GetOptimizer() lss, met = GetLoss() loaded_model.compile(loss=lss, metrics=met, optimizer=opt) print('loading data from ', saveloclist) loclist = np.genfromtxt(saveloclist, delimiter=',', dtype='str')[1:] trainingsubset = [row for row in loclist if int(row[0]) in train_index] testingsubset = [row for row in loclist if int(row[0]) in test_index] valid_index = [row for row in loclist if int(row[0]) in valid_index] test_xlist = [row[1] for row in testingsubset] test_ylist = [row[2] for row in testingsubset] for idx in test_index: print('\nImage', idx) print('gathering image data...') this_img_subset = [row for row in loclist if int(row[0]) == idx] this_img_xlist = [row[1] for row in this_img_subset] this_img_ylist = [row[2] for row in this_img_subset] img_in = np.empty( (len(this_img_xlist), settings.options.trainingresample, settings.options.trainingresample, 1), dtype=settings.FLOAT_DTYPE) seg_in = np.empty( (len(this_img_ylist), settings.options.trainingresample, settings.options.trainingresample, 1), dtype=settings.SEG_DTYPE) for iii in range(len(this_img_xlist)): img_in[iii, ...] = np.load(this_img_xlist[iii]) seg_in[iii, ...] = np.load(this_img_ylist[iii]) print('creating generator and performing prediction...') this_img_generator = NpyDataPredictionGenerator(this_img_xlist, this_img_ylist, batch_size=8) this_img_predictions = loaded_model.predict_generator( this_img_generator) this_seg = (this_img_predictions > 0.5).astype(settings.SEG_DTYPE) print('generating largest connected component...') this_lcc = preprocess.largest_connected_component( this_seg[..., 0]).astype(settings.SEG_DTYPE)[..., np.newaxis] print('saving data to', savedir) this_seg_nifti = nib.Nifti1Image(this_seg[..., 0], None) this_seg_nifti.to_filename(savedir + '/pred-' + str(idx) + '-int.nii') this_out_nifti = nib.Nifti1Image(this_img_predictions[..., 0], None) this_out_nifti.to_filename(savedir + '/pred-' + str(idx) + '-float.nii') this_img_nifti = nib.Nifti1Image(img_in[..., 0], None) this_img_nifti.to_filename(savedir + '/img-' + str(idx) + '.nii') this_tru_nifti = nib.Nifti1Image(seg_in[..., 0], None) this_tru_nifti.to_filename(savedir + '/seg-' + str(idx) + '.nii') this_lcc_nifti = nib.Nifti1Image(this_lcc[..., 0], None) this_lcc_nifti.to_filename(savedir + '/pred-' + str(idx) + '-lcc.nii') print('calculating metrics...') print('+ \tDSC-L2 3D (float) :\t', dsc_l2_3D_npy(seg_in, this_img_predictions)) print('+ \tDSC-L2 3D (int) :\t', dsc_l2_3D_npy(seg_in, this_seg)) print('+ \tDSC-L2 3D LCC (int) :\t', dsc_l2_3D_npy(seg_in, this_lcc)) print('+ \tDSC-L2 2D AVG (float) :\t', dsc_l2_2D_avg_npy(seg_in, this_img_predictions)) print('+ \tDSC-L2 2D AVG (int) :\t', dsc_l2_2D_avg_npy(seg_in, this_seg))
def TrainModel(idfold=0): from setupmodel import GetSetupKfolds, GetCallbacks, GetOptimizer, GetLoss from buildmodel import get_unet, thick_slices ### ### load data ### kfolds = settings.options.kfolds print('loading memory map db for large dataset') numpydatabase = np.load(settings._globalnpfile) (train_index, test_index) = GetSetupKfolds(settings.options.dbfile, kfolds, idfold) print('copy data subsets into memory...') axialbounds = numpydatabase['axialtumorbounds'] dataidarray = numpydatabase['dataid'] dbtrainindex = np.isin(dataidarray, train_index) dbtestindex = np.isin(dataidarray, test_index) subsetidx_train = np.all(np.vstack((axialbounds, dbtrainindex)), axis=0) subsetidx_test = np.all(np.vstack((axialbounds, dbtestindex)), axis=0) if np.sum(subsetidx_train) + np.sum(subsetidx_test) != min( np.sum(axialbounds), np.sum(dbtrainindex)): raise ("data error: slice numbers dont match") print('copy memory map from disk to RAM...') trainingsubset = numpydatabase[subsetidx_train] np.random.seed(seed=0) np.random.shuffle(trainingsubset) totnslice = len(trainingsubset) if settings.options.D3: x_data = trainingsubset['imagedata'] y_data = trainingsubset['truthdata'] x_train = thick_slices(x_data, settings.options.thickness) y_train = thick_slices(y_data, settings.options.thickness) else: x_train = trainingsubset['imagedata'] y_train = trainingsubset['truthdata'] slicesplit = int(0.9 * totnslice) TRAINING_SLICES = slice(0, slicesplit) VALIDATION_SLICES = slice(slicesplit, totnslice) print("\nkfolds : ", kfolds) print("idfold : ", idfold) print("slices in kfold : ", totnslice) print("slices training : ", slicesplit) print("slices validation : ", totnslice - slicesplit) try: print("slices testing : ", len(numpydatabase[subsetidx_test])) except: print("slices testing : 0") ### ### data preprocessing : applying liver mask ### y_train_typed = y_train.astype(settings.SEG_DTYPE) liver_idx = y_train_typed > 0 y_train_liver = np.zeros_like(y_train_typed) y_train_liver[liver_idx] = 1 tumor_idx = y_train_typed > 1 y_train_tumor = np.zeros_like(y_train_typed) y_train_tumor[tumor_idx] = 1 x_masked = x_train * y_train_liver - 100.0 * (1.0 - y_train_liver) x_masked = x_masked.astype(settings.IMG_DTYPE) ### ### set up output, logging, and callbacks ### logfileoutputdir = '%s/%03d/%03d' % (settings.options.outdir, kfolds, idfold) os.system('mkdir -p ' + logfileoutputdir) os.system('mkdir -p ' + logfileoutputdir + '/nii') os.system('mkdir -p ' + logfileoutputdir + '/tumor') print("Output to\t", logfileoutputdir) ### ### create and run model ### opt = GetOptimizer() callbacks, modelloc = GetCallbacks(logfileoutputdir, "tumor") lss, met = GetLoss() model = get_unet() model.compile(loss=lss, metrics=met, optimizer=opt) print("\n\n\tlivermask training...\tModel parameters: {0:,}".format( model.count_params())) if settings.options.augment: train_datagen = ImageDataGenerator( brightness_range=[0.95, 1.05], width_shift_range=[-0.1, 0.1], height_shift_range=[-0.1, 0.1], horizontal_flip=True, vertical_flip=True, zoom_range=0.1, fill_mode='nearest', ) else: train_datagen = ImageDataGenerator() test_datagen = ImageDataGenerator() if settings.options.D3: train_generator = train_datagen.flow( x_masked[TRAINING_SLICES, :, :, :, np.newaxis], y_train_tumor[TRAINING_SLICES, :, :, :, np.newaxis], batch_size=settings.options.trainingbatch) test_generator = test_datagen.flow( x_masked[TRAINING_SLICES, :, :, :, np.newaxis], y_train_tumor[TRAINING_SLICES, :, :, :, np.newaxis], batch_size=settings.options.validationbatch) else: train_generator = train_datagen.flow( x_masked[TRAINING_SLICES, :, :, np.newaxis], y_train_tumor[TRAINING_SLICES, :, :, np.newaxis], batch_size=settings.options.trainingbatch) test_generator = test_datagen.flow( x_masked[VALIDATION_SLICES, :, :, np.newaxis], y_train_tumor[VALIDATION_SLICES, :, :, np.newaxis], batch_size=settings.options.validationbatch) history_liver = model.fit_generator( train_generator, steps_per_epoch=slicesplit // settings.options.trainingbatch, validation_steps=(totnslice - slicesplit) // settings.options.validationbatch, epochs=settings.options.numepochs, validation_data=test_generator, callbacks=callbacks) ### ### make predicions on validation set ### print("\n\n\tapplying models...") if settings.options.D3: y_pred_float = model.predict(x_masked[VALIDATION_SLICES, :, :, :, np.newaxis]) else: y_pred_float = model.predict(x_masked[VALIDATION_SLICES, :, :, np.newaxis]) y_pred_seg = (y_pred_float[..., 0] >= settings.options.segthreshold).astype(settings.SEG_DTYPE) print("\tsaving to file...") if settings.options.D3: trueinnii = nib.Nifti1Image(x_train[VALIDATION_SLICES, :, :, :], None) truesegnii = nib.Nifti1Image(y_train[VALIDATION_SLICES, :, :, :], None) truelivernii = nib.Nifti1Image( y_train_liver[VALIDATION_SLICES, :, :, :], None) truetumornii = nib.Nifti1Image( y_train_tumor[VALIDATION_SLICES, :, :, :], None) else: trueinnii = nib.Nifti1Image(x_train[VALIDATION_SLICES, :, :], None) truesegnii = nib.Nifti1Image(y_train[VALIDATION_SLICES, :, :], None) truelivernii = nib.Nifti1Image(y_train_liver[VALIDATION_SLICES, :, :], None) truetumornii = nib.Nifti1Image(y_train_tumor[VALIDATION_SLICES, :, :], None) predsegnii = nib.Nifti1Image(y_pred_seg, None) predfloatnii = nib.Nifti1Image(y_pred_float, None) trueinnii.to_filename(logfileoutputdir + '/nii/trueimg.nii.gz') truesegnii.to_filename(logfileoutputdir + '/nii/truseg.nii.gz') truelivernii.to_filename(logfileoutputdir + '/nii/trueliver.nii.gz') truetumornii.to_filename(logfileoutputdir + '/nii/truetumor.nii.gz') predsegnii.to_filename(logfileoutputdir + '/nii/predtumorseg.nii.gz') predfloatnii.to_filename(logfileoutputdir + '/nii/predtumorfloat.nii.gz') print("\done saving.") return modelloc
def PredictKFold(modelloc, dbfile, outdir, kfolds=settings.options.kfolds, idfold=settings.options.idfold, saveloclist=None): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 (train_index, test_index, valid_index) = GetSetupKfolds(settings.options.dbfile, kfolds, idfold) logfileoutputdir = '%s/%03d/%03d' % (outdir, kfolds, idfold) os.system('mkdir -p ' + logfileoutputdir) os.system('mkdir -p ' + logfileoutputdir + '/predictions') savedir = logfileoutputdir + '/predictions' print('loading model from', modelloc) customdict={ \ 'dsc_l2': dsc_l2, 'ISTA': ISTA, 'DepthwiseConv3D': DepthwiseConv3D, } loaded_model = load_model(modelloc, compile=False, custom_objects=customdict) opt = GetOptimizer() lss, met = GetLoss('ensemble') loaded_model.compile(loss=lss, metrics=met, optimizer=opt) loaded_model.summary() print('loading data from ', saveloclist) loclist = np.genfromtxt(saveloclist, delimiter=',', dtype='str')[1:] trainingsubset = [row for row in loclist if int(row[0]) in train_index] testingsubset = [row for row in loclist if int(row[0]) in test_index] valid_index = [row for row in loclist if int(row[0]) in valid_index] test_xlist = [row[1] for row in testingsubset] test_ylist = [row[2] for row in testingsubset] for idx in test_index: print('\nImage', idx) print('gathering image data...') this_img_subset = [row for row in loclist if int(row[0]) == idx] this_img_xlist = [row[1] for row in this_img_subset] this_img_ylist = [row[2] for row in this_img_subset] img_in = np.empty( (len(this_img_xlist), settings.options.trainingresample, settings.options.trainingresample, 1), dtype=settings.FLOAT_DTYPE) seg_in = np.empty( (len(this_img_ylist), settings.options.trainingresample, settings.options.trainingresample, 1), dtype=settings.SEG_DTYPE) cat_in = np.empty( (len(this_img_ylist), settings.options.trainingresample, settings.options.trainingresample, 3), dtype=settings.FLOAT_DTYPE) for iii in range(len(this_img_xlist)): img_in[iii, ...] = np.load(this_img_xlist[iii]) loaded_y = np.load(this_img_ylist[iii]) seg_in[iii, ...] = loaded_y cat_in[iii, ...] = to_categorical(loaded_y, num_classes=3) print('creating generator and performing prediction...') this_img_generator = NpyDataGenerator_Prediction(this_img_xlist, this_img_ylist, batch_size=8) this_img_predictions = loaded_model.predict_generator( this_img_generator) this_pred_liver = this_img_predictions[..., 0] this_pred_tumor = this_img_predictions[..., 1] this_seg_liver = (this_pred_liver >= 0.5).astype(settings.SEG_DTYPE) this_seg_tumor = (this_pred_tumor >= 0.5).astype(settings.SEG_DTYPE) this_lcc_liver = preprocess.largest_connected_component( this_seg_liver).astype(settings.SEG_DTYPE) this_pred_tumor_masked = this_pred_tumor * this_lcc_liver this_seg_tumor_masked = this_seg_tumor * this_lcc_liver print('saving data to', savedir) save_nifti(this_pred_liver, savedir + '/pred-' + str(idx) + 'liver-float.nii') save_nifti(this_pred_tumor, savedir + '/pred-' + str(idx) + 'tumor-float.nii') # save_nifti(this_seg_liver, savedir+'/pred-'+str(idx)+'liver-int.nii') # save_nifti(this_seg_tumor, savedir+'/pred-'+str(idx)+'tumor-int.nii') save_nifti(this_lcc_liver, savedir + '/pred-' + str(idx) + 'liver-lcc.nii') # save_nifti(this_pred_tumor_masked, savedir+'/pred-'+str(idx)+'-tumor-masked-float.nii') save_nifti(this_seg_tumor_masked, savedir + '/pred-' + str(idx) + '-tumor-masked-int.nii') save_nifti(img_in[..., 0], savedir + '/img-' + str(idx) + '.nii') save_nifti(seg_in[..., 0], savedir + '/seg-' + str(idx) + '.nii') save_nifti(cat_in[..., 1], savedir + '/seg-' + str(idx) + '-liver.nii') save_nifti(cat_in[..., 2], savedir + '/seg-' + str(idx) + '-tumor.nii') print('calculating metrics...') print( ' \t---------------------------------------------------------------------------------' ) print( '+ \tDSC-L2 3D LIVER LCC (int) :\t', dsc_l2_3D_npy(cat_in[..., 1, np.newaxis], this_lcc_liver[..., np.newaxis])) print( '+ \tDSC-L2 3D TUMOR MASKED (float) :\t', dsc_l2_3D_npy(cat_in[..., 2, np.newaxis], this_pred_tumor_masked[..., np.newaxis])) print( '+ \tDSC-L2 3D TUMOR MASKED (int) :\t', dsc_l2_3D_npy(cat_in[..., 2, np.newaxis], this_seg_tumor_masked[..., np.newaxis])) print( ' \t---------------------------------------------------------------------------------' ) print( '+ \tDSC-L2 3D LIVER (float) :\t', dsc_l2_3D_npy(cat_in[..., 1, np.newaxis], this_pred_liver[..., np.newaxis])) print( '+ \tDSC-L2 3D LIVER (int) :\t', dsc_l2_3D_npy(cat_in[..., 1, np.newaxis], this_seg_liver[..., np.newaxis])) print( '+ \tDSC-L2 3D TUMOR (float) :\t', dsc_l2_3D_npy(cat_in[..., 2, np.newaxis], this_pred_tumor[..., np.newaxis])) print( '+ \tDSC-L2 3D TUMOR (int) :\t', dsc_l2_3D_npy(cat_in[..., 2, np.newaxis], this_seg_tumor[..., np.newaxis])) print( ' \t---------------------------------------------------------------------------------' )
def TrainModel(idfold=0): from setupmodel import GetSetupKfolds, GetCallbacks, GetOptimizer, GetLoss from buildmodel import get_unet ### ### set up output, logging, and callbacks ### kfolds = settings.options.kfolds logfileoutputdir = '%s/%03d/%03d' % (settings.options.outdir, kfolds, idfold) os.system('mkdir -p ' + logfileoutputdir) os.system('mkdir -p ' + logfileoutputdir + '/nii') os.system('mkdir -p ' + logfileoutputdir + '/liver') print("Output to\t", logfileoutputdir) ### ### load data ### print('loading memory map db for large dataset') numpydatabase = np.load(settings._globalnpfile) (train_index, test_index, valid_index) = GetSetupKfolds(settings.options.dbfile, kfolds, idfold) print('copy data subsets into memory...') axialbounds = numpydatabase['axialliverbounds'] dataidarray = numpydatabase['dataid'] dbtrainindex = np.isin(dataidarray, train_index) dbtestindex = np.isin(dataidarray, test_index) dbvalidindex = np.isin(dataidarray, valid_index) subsetidx_train = np.all(np.vstack((axialbounds, dbtrainindex)), axis=0) subsetidx_test = np.all(np.vstack((axialbounds, dbtestindex)), axis=0) subsetidx_valid = np.all(np.vstack((axialbounds, dbvalidindex)), axis=0) if np.sum(subsetidx_train) + np.sum(subsetidx_test) + np.sum( subsetidx_valid) != min(np.sum(axialbounds), np.sum(dbtrainindex)): raise ("data error: slice numbers dont match") print('copy memory map from disk to RAM...') trainingsubset = numpydatabase[subsetidx_train] validsubset = numpydatabase[subsetidx_valid] testsubset = numpydatabase[subsetidx_test] # trimg = trainingsubset['imagedata'] # trseg = trainingsubset['truthdata'] # vaimg = validsubset['imagedata'] # vaseg = validsubset['truthdata'] # teimg = testsubset['imagedata'] # teseg = testsubset['truthdata'] # trimg_img = nib.Nifti1Image(trimg, None) # trimg_img.to_filename( logfileoutputdir+'/nii/train-img.nii.gz') # vaimg_img = nib.Nifti1Image(vaimg, None) # vaimg_img.to_filename( logfileoutputdir+'/nii/valid-img.nii.gz') # teimg_img = nib.Nifti1Image(teimg, None) # teimg_img.to_filename( logfileoutputdir+'/nii/test-img.nii.gz') # # trseg_img = nib.Nifti1Image(trseg, None) # trseg_img.to_filename( logfileoutputdir+'/nii/train-seg.nii.gz') # vaseg_img = nib.Nifti1Image(vaseg, None) # vaseg_img.to_filename( logfileoutputdir+'/nii/valid-seg.nii.gz') # teseg_img = nib.Nifti1Image(teseg, None) # teseg_img.to_filename( logfileoutputdir+'/nii/test-seg.nii.gz') np.random.seed(seed=0) np.random.shuffle(trainingsubset) ntrainslices = len(trainingsubset) nvalidslices = len(validsubset) x_train = trainingsubset['imagedata'] y_train = trainingsubset['truthdata'] x_valid = validsubset['imagedata'] y_valid = validsubset['truthdata'] print("\nkfolds : ", kfolds) print("idfold : ", idfold) print("slices training : ", ntrainslices) print("slices validation : ", nvalidslices) ### ### data preprocessing : applying liver mask ### y_train_typed = y_train.astype(settings.SEG_DTYPE) y_train_liver = preprocess.livermask(y_train_typed) x_train_typed = x_train x_train_typed = preprocess.window(x_train_typed, settings.options.hu_lb, settings.options.hu_ub) x_train_typed = preprocess.rescale(x_train_typed, settings.options.hu_lb, settings.options.hu_ub) y_valid_typed = y_valid.astype(settings.SEG_DTYPE) y_valid_liver = preprocess.livermask(y_valid_typed) x_valid_typed = x_valid x_valid_typed = preprocess.window(x_valid_typed, settings.options.hu_lb, settings.options.hu_ub) x_valid_typed = preprocess.rescale(x_valid_typed, settings.options.hu_lb, settings.options.hu_ub) ### ### create and run model ### opt = GetOptimizer() callbacks, modelloc = GetCallbacks(logfileoutputdir, "liver") lss, met = GetLoss() model = get_unet() model.compile(loss=lss, metrics=met, optimizer=opt) print("\n\n\tlivermask training...\tModel parameters: {0:,}".format( model.count_params())) if settings.options.augment: train_datagen = ImageDataGenerator( brightness_range=[0.9, 1.1], preprocessing_function=preprocess.post_augment, ) train_maskgen = ImageDataGenerator() else: train_datagen = ImageDataGenerator() train_maskgen = ImageDataGenerator() sd = 2 # arbitrary but fixed seed for ImageDataGenerators() dataflow = train_datagen.flow(x_train_typed[..., np.newaxis], batch_size=settings.options.trainingbatch, seed=sd, shuffle=True) maskflow = train_maskgen.flow(y_train_liver[..., np.newaxis], batch_size=settings.options.trainingbatch, seed=sd, shuffle=True) train_generator = zip(dataflow, maskflow) # train_generator = train_datagen.flow(x_train_typed[...,np.newaxis], # y=y_train_liver[...,np.newaxis], # batch_size=settings.options.trainingbatch, # seed=sd, # shuffle=True) valid_datagen = ImageDataGenerator() valid_maskgen = ImageDataGenerator() validdataflow = valid_datagen.flow( x_valid_typed[..., np.newaxis], batch_size=settings.options.validationbatch, seed=sd, shuffle=True) validmaskflow = valid_maskgen.flow( y_valid_liver[..., np.newaxis], batch_size=settings.options.validationbatch, seed=sd, shuffle=True) valid_generator = zip(validdataflow, validmaskflow) ### ### visualize augmentation ### # # import matplotlib # matplotlib.use('TkAgg') # from matplotlib import pyplot as plt # for i in range(8): # plt.subplot(4,4,2*i + 1) # imbatch = dataflow.next() # sgbatch = maskflow.next() # imaug = imbatch[0][:,:,0] # sgaug = sgbatch[0][:,:,0] # plt.imshow(imaug) # plt.subplot(4,4,2*i + 2) # plt.imshow(sgaug) # plt.show() # return # history_liver = model.fit_generator( train_generator, steps_per_epoch=ntrainslices / settings.options.trainingbatch, epochs=settings.options.numepochs, validation_data=valid_generator, callbacks=callbacks, shuffle=True, validation_steps=nvalidslices / settings.options.validationbatch, ) ### ### make predicions on validation set ### print("\n\n\tapplying models...") y_pred_float = model.predict(x_valid_typed[..., np.newaxis]) y_pred_seg = (y_pred_float[..., 0] >= settings.options.segthreshold).astype(settings.SEG_DTYPE) print("\tsaving to file...") trueinnii = nib.Nifti1Image(x_valid, None) truesegnii = nib.Nifti1Image(y_valid, None) # windownii = nib.Nifti1Image(x_valid_typed, None) truelivernii = nib.Nifti1Image(y_valid_liver, None) predsegnii = nib.Nifti1Image(y_pred_seg, None) predfloatnii = nib.Nifti1Image(y_pred_float, None) trueinnii.to_filename(logfileoutputdir + '/nii/trueimg.nii.gz') truesegnii.to_filename(logfileoutputdir + '/nii/trueseg.nii.gz') # windownii.to_filename( logfileoutputdir+'/nii/windowedimg.nii.gz') truelivernii.to_filename(logfileoutputdir + '/nii/trueliver.nii.gz') predsegnii.to_filename(logfileoutputdir + '/nii/predtumorseg.nii.gz') predfloatnii.to_filename(logfileoutputdir + '/nii/predtumorfloat.nii.gz') print("\done saving.") return modelloc
def TrainModel(idfold=0, saveloclist=None): from setupmodel import GetSetupKfolds, GetCallbacks, GetOptimizer, GetLoss from buildmodel import get_unet os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE' ### ### set up output, logging, and callbacks ### kfolds = settings.options.kfolds logfileoutputdir = '%s/%03d/%03d' % (settings.options.outdir, kfolds, idfold) os.system('mkdir -p ' + logfileoutputdir) os.system('mkdir -p ' + logfileoutputdir + '/nii') if settings.options.liver: os.system('mkdir -p ' + logfileoutputdir + '/liver') elif settings.options.tumor: os.system('mkdir -p ' + logfileoutputdir + '/tumor') else: print('need to choose one of {liver,tumor}') raise ValueError( 'need to choose to perform liver or tumor segmentation') print("Output to\t", logfileoutputdir) ### ### load data ### (train_index, test_index, valid_index) = GetSetupKfolds(settings.options.dbfile, kfolds, idfold) # if not saveloclist: # saveloclist = setup_training_from_file() if settings.options.liver: saveloclist = settings.options.datafiles_liver elif settings.options.tumor: saveloclist = settings.options.datafiles_tumor else: saveloclist = settings.options.datafiles_liver loclist = np.genfromtxt(saveloclist, delimiter=',', dtype='str')[1:] trainingsubset = [row for row in loclist if int(row[0]) in train_index] testingsubset = [row for row in loclist if int(row[0]) in test_index] validsubset = [row for row in loclist if int(row[0]) in valid_index] ### ### create and run model ### opt = GetOptimizer() callbacks, modelloc = GetCallbacks(logfileoutputdir, "liver") lss, met = GetLoss() model = get_unet() model.summary() model.compile(loss=lss, metrics=met, optimizer=opt) print("\n\n\tlivermask training...\tModel parameters: {0:,}".format( model.count_params())) train_xlist = [row[1] for row in trainingsubset] train_ylist = [row[2] for row in trainingsubset] valid_xlist = [row[1] for row in validsubset] valid_ylist = [row[2] for row in validsubset] training_generator = NpyDataGenerator(train_xlist, train_ylist) validation_generator = NpyDataGenerator(valid_xlist, valid_ylist) history_liver = model.fit_generator( \ verbose=2-int(settings.options.verbose), generator=training_generator, validation_data=validation_generator, use_multiprocessing=True, workers=8, # steps_per_epoch = ntrainslices / settings.options.trainingbatch, epochs = settings.options.numepochs, callbacks = callbacks, shuffle = True, # validation_steps = nvalidslices / settings.options.validationbatch, ) return modelloc
def TrainModel(idfold=0): from setupmodel import GetSetupKfolds, GetCallbacks, GetOptimizer, GetLoss from buildmodel import get_unet os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' ### ### load data ### kfolds = settings.options.kfolds logfileoutputdir = '%s/%03d/%03d' % (settings.options.outdir, kfolds, idfold) os.system('mkdir -p ' + logfileoutputdir) os.system('mkdir -p ' + logfileoutputdir + '/nii') os.system('mkdir -p ' + logfileoutputdir + '/tumor') print("Output to\t", logfileoutputdir) print('loading memory map db for large dataset') numpydatabase = np.load(settings._globalnpfile) (train_index, test_index, valid_index) = GetSetupKfolds(settings.options.dbfile, kfolds, idfold) print('copy data subsets into memory...') axialbounds = numpydatabase['axialtumorbounds'] dataidarray = numpydatabase['dataid'] dbtrainindex = np.isin(dataidarray, train_index) dbtestindex = np.isin(dataidarray, test_index) dbvalidindex = np.isin(dataidarray, valid_index) subsetidx_train = np.all(np.vstack((axialbounds, dbtrainindex)), axis=0) subsetidx_test = np.all(np.vstack((axialbounds, dbtestindex)), axis=0) subsetidx_valid = np.all(np.vstack((axialbounds, dbvalidindex)), axis=0) if np.sum(subsetidx_train) + np.sum(subsetidx_test) + np.sum( subsetidx_valid) != min(np.sum(axialbounds), np.sum(dbtrainindex)): raise ("data error: slice numbers dont match") print('copy memory map from disk to RAM...') trainingsubset = numpydatabase[subsetidx_train] validsubset = numpydatabase[subsetidx_valid] testsubset = numpydatabase[subsetidx_test] del numpydatabase del axialbounds del dataidarray del dbtrainindex del dbtestindex del dbvalidindex del subsetidx_train del subsetidx_test del subsetidx_valid np.random.seed(seed=0) np.random.shuffle(trainingsubset) ntrainslices = len(trainingsubset) nvalidslices = len(validsubset) x_train = trainingsubset['imagedata'] y_train = trainingsubset['truthdata'] x_valid = validsubset['imagedata'] y_valid = validsubset['truthdata'] print('\nkfolds : ', kfolds) print("idfold : ", idfold) print("slices training : ", ntrainslices) print("slices validation : ", nvalidslices) ### ### data preprocessing : applying liver mask ### y_train_typed = y_train.astype(settings.SEG_DTYPE) y_train_liver = preprocess.livermask(y_train_typed) y_train_tumor = preprocess.tumormask(y_train_typed) x_train_typed = x_train x_train_typed = preprocess.window(x_train_typed, settings.options.hu_lb, settings.options.hu_ub) x_train_typed = preprocess.rescale(x_train_typed, settings.options.hu_lb, settings.options.hu_ub) x_train_masked = x_train_typed * y_train_liver.astype( settings.IMG_DTYPE) - (1.0 - y_train_liver.astype(settings.IMG_DTYPE)) y_valid_typed = y_valid.astype(settings.SEG_DTYPE) y_valid_liver = preprocess.livermask(y_valid_typed) y_valid_tumor = preprocess.tumormask(y_valid_typed) x_valid_typed = x_valid x_valid_typed = preprocess.window(x_valid_typed, settings.options.hu_lb, settings.options.hu_ub) x_valid_typed = preprocess.rescale(x_valid_typed, settings.options.hu_lb, settings.options.hu_ub) x_valid_masked = x_valid_typed * y_valid_liver.astype( settings.IMG_DTYPE) - (1.0 - y_valid_liver.astype(settings.IMG_DTYPE)) ### ### create and run model ### opt = GetOptimizer() callbacks, modelloc = GetCallbacks(logfileoutputdir, "tumor") lss, met = GetLoss() model = get_unet() model.compile(loss=lss, metrics=met, optimizer=opt) print("\n\n\tlivermask training...\tModel parameters: {0:,}".format( model.count_params())) if settings.options.augment: train_datagen = ImageDataGenerator( brightness_range=[0.9, 1.1], fill_mode='nearest', preprocessing_function=preprocess.post_augment) train_maskgen = ImageDataGenerator() else: train_datagen = ImageDataGenerator() train_maskgen = ImageDataGenerator() test_datagen = ImageDataGenerator() sd = 2 # dataflow = train_datagen.flow(x_train_typed[...,np.newaxis], dataflow = train_datagen.flow(x_train_masked[..., np.newaxis], batch_size=settings.options.trainingbatch, seed=sd, shuffle=True) maskflow = train_maskgen.flow(y_train_tumor[..., np.newaxis], batch_size=settings.options.trainingbatch, seed=sd, shuffle=True) train_generator = zip(dataflow, maskflow) valid_datagen = ImageDataGenerator() valid_maskgen = ImageDataGenerator() valid_dataflow = valid_datagen.flow( x_valid_masked[..., np.newaxis], batch_size=settings.options.validationbatch, seed=sd, shuffle=True) valid_maskflow = valid_maskgen.flow( y_valid_tumor[..., np.newaxis], batch_size=settings.options.validationbatch, seed=sd, shuffle=True) valid_generator = zip(valid_dataflow, valid_maskflow) history_tumor = model.fit_generator( train_generator, steps_per_epoch=ntrainslices / settings.options.trainingbatch, epochs=settings.options.numepochs, validation_data=valid_generator, callbacks=callbacks, shuffle=True, validation_steps=nvalidslices / settings.options.validationbatch, ) del x_train del y_train del x_train_typed del y_train_typed del y_train_liver del y_train_tumor del x_train_masked ### ### make predicions on validation set ### print("\n\n\tapplying models...") y_pred_float = model.predict(x_valid_masked[..., np.newaxis]) y_pred_seg = (y_pred_float[..., 0] >= settings.options.segthreshold).astype(settings.SEG_DTYPE) print("\tsaving to file...") trueinnii = nib.Nifti1Image(x_valid, None) truesegnii = nib.Nifti1Image(y_valid, None) truelivernii = nib.Nifti1Image(y_valid_liver, None) truetumornii = nib.Nifti1Image(y_valid_tumor, None) windownii = nib.Nifti1Image(x_valid_typed, None) maskednii = nib.Nifti1Image(x_valid_masked, None) predsegnii = nib.Nifti1Image(y_pred_seg, None) predfloatnii = nib.Nifti1Image(y_pred_float, None) trueinnii.to_filename(logfileoutputdir + '/nii/trueimg.nii.gz') truesegnii.to_filename(logfileoutputdir + '/nii/trueseg.nii.gz') truetumornii.to_filename(logfileoutputdir + '/nii/truetumor.nii.gz') truelivernii.to_filename(logfileoutputdir + '/nii/trueliver.nii.gz') windownii.to_filename(logfileoutputdir + '/nii/windowedimg.nii.gz') maskednii.to_filename(logfileoutputdir + '/nii/masked.nii.gz') predsegnii.to_filename(logfileoutputdir + '/nii/predtumorseg.nii.gz') predfloatnii.to_filename(logfileoutputdir + '/nii/predtumorfloat.nii.gz') del x_valid del y_valid del x_valid_typed del y_valid_typed del y_valid_liver del y_valid_tumor del x_valid_masked print("\done saving.") return modelloc
def TrainModel(idfold=0): from setupmodel import GetSetupKfolds, GetCallbacks, GetOptimizer, GetLoss from buildmodel import get_lse, get_lsn, get_lse_G ### ### load data ### kfolds = settings.options.kfolds print('loading memory map db for large dataset') numpydatabase = np.load(settings._globalnpfile) (train_index, test_index) = GetSetupKfolds(settings.options.dbfile, kfolds, idfold) print('copy data subsets into memory...') axialbounds = numpydatabase['axialliverbounds'] dataidarray = numpydatabase['dataid'] dbtrainindex = np.isin(dataidarray, train_index) dbtestindex = np.isin(dataidarray, test_index) subsetidx_train = np.all(np.vstack((axialbounds, dbtrainindex)), axis=0) subsetidx_test = np.all(np.vstack((axialbounds, dbtestindex)), axis=0) print(np.sum(subsetidx_train)) print(np.sum(subsetidx_test)) print(np.sum(axialbounds)) print(np.sum(dbtrainindex)) # if np.sum(subsetidx_train) + np.sum(subsetidx_test) != min(np.sum(axialbounds ),np.sum(dbtrainindex )) : # raise("data error: slice numbers dont match") print('copy memory map from disk to RAM...') trainingsubset = numpydatabase[subsetidx_train] np.random.seed(seed=0) np.random.shuffle(trainingsubset) totnslice = len(trainingsubset) x_train = trainingsubset['imagedata'] y_train = trainingsubset['truthdata'] x_train_dx = trainingsubset['image_dx'] x_train_dy = trainingsubset['image_dy'] x_train_dz = trainingsubset['image_dz'] x_train_dims = np.transpose(np.vstack( (x_train_dx, x_train_dy, x_train_dz))) slicesplit = int(0.9 * totnslice) TRAINING_SLICES = slice(0, slicesplit) VALIDATION_SLICES = slice(slicesplit, totnslice) print("\nkfolds : ", kfolds) print("idfold : ", idfold) print("slices in kfold : ", totnslice) print("slices training : ", slicesplit) print("slices validation : ", totnslice - slicesplit) try: print("slices testing : ", len(numpydatabase[subsetidx_test])) except: print("slices testing : 0") ### ### data preprocessing : applying liver mask ### y_train_typed = y_train.astype(settings.SEG_DTYPE) liver_idx = y_train_typed > 0 y_train_liver = np.zeros_like(y_train_typed) y_train_liver[liver_idx] = 1 # tumor_idx = y_train_typed > 1 # y_train_tumor = np.zeros_like(y_train_typed) # y_train_tumor[tumor_idx] = 1 # # x_masked = x_train * y_train_liver - 100.0*(1.0 - y_train_liver) # x_masked = x_masked.astype(settings.IMG_DTYPE) ### ### intialize u0 ### if settings.options.randinit: x_init = np.random.uniform(size=(totnslice, settings._nx, settings._ny)) elif settings.options.circleinit: x_center = int(settings._ny * 0.25) y_center = int(settings._nx * 0.67) rad = 20 x_init = np.zeros((totnslice, settings._nx, settings._ny)) for xxx in range(2 * rad): xloc = x_center - rad + xxx for yyy in range(2 * rad): yloc = y_center - rad + yyy x_init[:, yloc, xloc] = 1.0 else: x_init = np.ones((totnslice, settings._nx, settings._ny)) ### ### set up output, logging, and callbacks ### logfileoutputdir = '%s/%03d/%03d' % (settings.options.outdir, kfolds, idfold) os.system('mkdir -p ' + logfileoutputdir) os.system('mkdir -p ' + logfileoutputdir + '/nii') os.system('mkdir -p ' + logfileoutputdir + '/liver') print("Output to\t", logfileoutputdir) ### ### create and run model ### opt = GetOptimizer() callbacks, modelloc = GetCallbacks(logfileoutputdir, "liver") lss, met = GetLoss() # model = get_lse(settings.options.nt) # model = get_lsn(settings.options.nt) model = get_lse_G(settings.options.nt) model.compile(loss=lss, metrics=met, optimizer=opt) print("\n\n\tlivermask training...\tModel parameters: {0:,}".format( model.count_params())) x_in = np.concatenate( (x_train[TRAINING_SLICES, :, :, np.newaxis], x_init[TRAINING_SLICES, :, :, np.newaxis]), axis=-1) x_val = np.concatenate( (x_train[VALIDATION_SLICES, :, :, np.newaxis], x_init[VALIDATION_SLICES, :, :, np.newaxis]), axis=-1) dim_in = x_train_dims[TRAINING_SLICES, :, np.newaxis, np.newaxis] dim_val = x_train_dims[VALIDATION_SLICES, :, np.newaxis, np.newaxis] print(x_train.shape) print(x_in.shape) print(x_val.shape) print(dim_in.shape) print(dim_val.shape) if settings.options.augment: train_datagen = ImageDataGenerator( brightness_range=[0.95, 1.05], width_shift_range=[-0.1, 0.1], height_shift_range=[-0.1, 0.1], horizontal_flip=True, vertical_flip=True, zoom_range=0.1, fill_mode='nearest', ) else: train_datagen = ImageDataGenerator() test_datagen = ImageDataGenerator() # train_generator = train_datagen.flow([ x_in, dim_in ], # y_train_liver[TRAINING_SLICES,:,:,np.newaxis], # batch_size=settings.options.trainingbatch) # test_generator = test_datagen.flow( [ x_val, dim_val ], # y_train_liver[VALIDATION_SLICES,:,:,np.newaxis], # batch_size=settings.options.validationbatch) # history_liver = model.fit_generator( # train_generator, # steps_per_epoch= slicesplit / settings.options.trainingbatch, # epochs=settings.options.numepochs, # validation_data=test_generator, # callbacks=callbacks, # shuffle=True) print(x_in.shape) print(dim_in.shape) print(len([x_in, dim_in])) print(model.summary()) history = model.fit(x=[x_in, dim_in], y=y_train_liver[TRAINING_SLICES, :, :, np.newaxis], validation_data=[[x_val, dim_val], y_train_liver[VALIDATION_SLICES, :, :, np.newaxis]], callbacks=callbacks, batch_size=settings.options.trainingbatch, epochs=settings.options.numepochs) ### ### make predicions on validation set ### print("\n\n\tapplying models...") y_pred_float = model.predict( [x_val, x_train_dims[VALIDATION_SLICES, :, np.newaxis, np.newaxis]]) y_pred_seg = (y_pred_float[..., 0] >= settings.options.segthreshold).astype(settings.SEG_DTYPE) print("\tsaving to file...") trueinnii = nib.Nifti1Image(x_train[VALIDATION_SLICES, :, :], None) truesegnii = nib.Nifti1Image(y_train[VALIDATION_SLICES, :, :], None) truelivernii = nib.Nifti1Image(y_train_liver[VALIDATION_SLICES, :, :], None) predsegnii = nib.Nifti1Image(y_pred_seg, None) predfloatnii = nib.Nifti1Image(y_pred_float, None) trueinnii.to_filename(logfileoutputdir + '/nii/trueimg.nii.gz') truesegnii.to_filename(logfileoutputdir + '/nii/truseg.nii.gz') truelivernii.to_filename(logfileoutputdir + '/nii/trueliver.nii.gz') predsegnii.to_filename(logfileoutputdir + '/nii/predseg.nii.gz') predfloatnii.to_filename(logfileoutputdir + '/nii/predfloat.nii.gz') print("\done saving.") return modelloc
def TrainModel(idfold=0, saveloclist=None): from setupmodel import GetSetupKfolds, GetCallbacks, GetOptimizer, GetLoss from buildmodel import get_unet_liver, get_unet_tumor, get_unet_ensemble os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE' ### ### set up output, logging, callbacks, and k-folds ### kfolds = settings.options.kfolds logfileoutputdir = '%s/%03d/%03d' % (settings.options.outdir, kfolds, idfold) os.system('mkdir -p ' + logfileoutputdir) # os.system ('mkdir -p ' + logfileoutputdir + '/nii') os.system('mkdir -p ' + logfileoutputdir + '/liver') os.system('mkdir -p ' + logfileoutputdir + '/tumor') os.system('mkdir -p ' + logfileoutputdir + '/tuning') os.system('mkdir -p ' + logfileoutputdir + '/ensemble') print("Output to\t", logfileoutputdir) (train_index, test_index, valid_index) = GetSetupKfolds(settings.options.dbfile, kfolds, idfold) if settings.options.verbose: vb = 1 else: vb = 2 ######## ######## ### ### TRAIN LIVER MODEL ### ######## ######## saveloclist = settings.options.datafiles_liver loclist = np.genfromtxt(saveloclist, delimiter=',', dtype='str')[1:] trainingsubset = [row for row in loclist if int(row[0]) in train_index] testingsubset = [row for row in loclist if int(row[0]) in test_index] validsubset = [row for row in loclist if int(row[0]) in valid_index] opt = GetOptimizer() callbacks, modelloc = GetCallbacks(logfileoutputdir, "liver") lss, met = GetLoss('liver') liver_model = get_unet_liver() liver_model.summary() liver_model.compile(loss=lss, metrics=met, optimizer=opt) print("\n\n#\t liver model training...\tModel parameters: {0:,}".format( liver_model.count_params())) train_xlist = [row[1] for row in trainingsubset] train_ylist = [row[2] for row in trainingsubset] valid_xlist = [row[1] for row in validsubset] valid_ylist = [row[2] for row in validsubset] training_generator = NpyDataGenerator_Liver(train_xlist, train_ylist) validation_generator = NpyDataGenerator_Liver(valid_xlist, valid_ylist) history_liver = liver_model.fit_generator( \ verbose = vb, generator = training_generator, validation_data = validation_generator, use_multiprocessing = True, workers = 16, epochs = settings.options.numepochs, callbacks = callbacks, shuffle = True, ) ######## ######## ### ### TRAIN TUMOR MODEL WITH TRUE MASK DATA ### ######## ######## saveloclist = settings.options.datafiles_tumor loclist = np.genfromtxt(saveloclist, delimiter=',', dtype='str')[1:] trainingsubset = [row for row in loclist if int(row[0]) in train_index] testingsubset = [row for row in loclist if int(row[0]) in test_index] validsubset = [row for row in loclist if int(row[0]) in valid_index] opt = GetOptimizer() callbacks, modelloc = GetCallbacks(logfileoutputdir, "tumor") lss, met = GetLoss('tumor') tumor_model = get_unet_tumor(liver_model) tumor_model.summary() tumor_model.compile(loss=lss, metrics=met, optimizer=opt) print("\n\n#\t tumor model training...\tModel parameters: {0:,}".format( tumor_model.count_params())) train_xlist = [row[1] for row in trainingsubset] train_ylist = [row[2] for row in trainingsubset] valid_xlist = [row[1] for row in validsubset] valid_ylist = [row[2] for row in validsubset] training_generator = NpyDataGenerator_Tumor(train_xlist, train_ylist) validation_generator = NpyDataGenerator_Tumor(valid_xlist, valid_ylist) history_liver = tumor_model.fit_generator( \ verbose = vb, generator = training_generator, validation_data = validation_generator, use_multiprocessing = True, workers = 16, epochs = settings.options.numepochs, callbacks = callbacks, shuffle = True, ) ######## ######## ### ### TUNE TUMOR MODEL WITH LIVER MODEL MASKS ### ######## ######## saveloclist = settings.options.datafiles_tumor loclist = np.genfromtxt(saveloclist, delimiter=',', dtype='str')[1:] trainingsubset = [row for row in loclist if int(row[0]) in train_index] testingsubset = [row for row in loclist if int(row[0]) in test_index] validsubset = [row for row in loclist if int(row[0]) in valid_index] opt = GetOptimizer(tuning=True) callbacks, modelloc = GetCallbacks(logfileoutputdir, "tuning") lss, met = GetLoss('ensemble') ensemble = get_unet_ensemble(liver_model, tumor_model, tune_liver=True, tune_tumor=True) ensemble.compile(loss=lss, metrics=met, optimizer=opt) ensemble.summary() print("\n\n#\t ensemble model on tumor slices...\tModel parameters: {0:,}". format(ensemble.count_params())) train_xlist = [row[1] for row in trainingsubset] train_ylist = [row[2] for row in trainingsubset] valid_xlist = [row[1] for row in validsubset] valid_ylist = [row[2] for row in validsubset] training_generator = NpyDataGenerator_Ensemble(train_xlist, train_ylist) validation_generator = NpyDataGenerator_Ensemble(valid_xlist, valid_ylist) history_liver = ensemble.fit_generator( \ verbose = vb, generator = training_generator, validation_data = validation_generator, use_multiprocessing = True, workers = 16, epochs = settings.options.numepochs//2, callbacks = callbacks, shuffle = True, ) ''' ######## ######## ### ### TUNE ENSEMBLE MODEL ### ######## ######## saveloclist = settings.options.datafiles_liver loclist = np.genfromtxt(saveloclist, delimiter=',', dtype='str')[1:] trainingsubset = [ row for row in loclist if int(row[0]) in train_index] testingsubset = [ row for row in loclist if int(row[0]) in test_index ] validsubset = [ row for row in loclist if int(row[0]) in valid_index] opt = GetOptimizer(tuning=True) callbacks, modelloc = GetCallbacks(logfileoutputdir, "ensemble") lss, met = GetLoss('ensemble') for lyr in ensemble.layers: lyr.trainable = True ensemble.compile(loss=lss, metrics=met, optimizer=opt) print("\n\n#\t ensemble model on liver slices...\tModel parameters: {0:,}".format(ensemble.count_params())) train_xlist = [row[1] for row in trainingsubset] train_ylist = [row[2] for row in trainingsubset] valid_xlist = [row[1] for row in validsubset] valid_ylist = [row[2] for row in validsubset] training_generator = NpyDataGenerator_Ensemble(train_xlist, train_ylist) validation_generator = NpyDataGenerator_Ensemble(valid_xlist, valid_ylist) history_liver = ensemble.fit_generator( \ verbose = vb, generator = training_generator, validation_data = validation_generator, use_multiprocessing = True, workers = 16, epochs = settings.options.numepochs//2, callbacks = callbacks, shuffle = True, ) ''' return modelloc
def TrainModel(idfold=0): from setupmodel import GetSetupKfolds, GetCallbacks, GetOptimizer, GetLoss from buildmodel import get_unet, thick_slices, unthick_slices, unthick ### ### set up output, logging and callbacks ### kfolds = settings.options.kfolds logfileoutputdir= '%s/%03d/%03d' % (settings.options.outdir, kfolds, idfold) os.system ('mkdir -p ' + logfileoutputdir) os.system ('mkdir -p ' + logfileoutputdir + '/nii') os.system ('mkdir -p ' + logfileoutputdir + '/liver') print("Output to\t", logfileoutputdir) ### ### load data ### print('loading memory map db for large dataset') numpydatabase = np.load(settings._globalnpfile) (train_index,test_index,valid_index) = GetSetupKfolds(settings.options.dbfile, kfolds, idfold) print('copy data subsets into memory...') axialbounds = numpydatabase['axialliverbounds'] dataidarray = numpydatabase['dataid'] dbtrainindex = np.isin(dataidarray, train_index ) dbtestindex = np.isin(dataidarray, test_index ) dbvalidindex = np.isin(dataidarray, valid_index ) subsetidx_train = np.all( np.vstack((axialbounds , dbtrainindex)) , axis=0 ) subsetidx_test = np.all( np.vstack((axialbounds , dbtestindex )) , axis=0 ) subsetidx_valid = np.all( np.vstack((axialbounds , dbvalidindex)) , axis=0 ) print(np.sum(subsetidx_train) + np.sum(subsetidx_test) + np.sum(subsetidx_valid)) print(min(np.sum(axialbounds ),np.sum(dbtrainindex ))) if np.sum(subsetidx_train) + np.sum(subsetidx_test) + np.sum(subsetidx_valid) != min(np.sum(axialbounds ),np.sum(dbtrainindex )) : raise("data error: slice numbers dont match") print('copy memory map from disk to RAM...') trainingsubset = numpydatabase[subsetidx_train] validsubset = numpydatabase[subsetidx_valid] testsubset = numpydatabase[subsetidx_test] # np.random.seed(seed=0) # np.random.shuffle(trainingsubset) ntrainslices = len(trainingsubset) nvalidslices = len(validsubset) if settings.options.D3: x_data = trainingsubset['imagedata'] y_data = trainingsubset['truthdata'] x_valid = validsubset['imagedata'] y_valid = validsubset['truthdata'] x_train = thick_slices(x_data, settings.options.thickness, trainingsubset['dataid'], train_index) y_train = thick_slices(y_data, settings.options.thickness, trainingsubset['dataid'], train_index) x_valid = thick_slices(x_valid, settings.options.thickness, validsubset['dataid'], valid_index) y_valid = thick_slices(y_valid, settings.options.thickness, validsubset['dataid'], valid_index) np.random.seed(seed=0) train_shuffle = np.random.permutation(x_train.shape[0]) valid_shuffle = np.random.permutation(x_valid.shape[0]) x_train = x_train[train_shuffle,...] y_train = y_train[train_shuffle,...] x_valid = x_valid[valid_shuffle,...] y_valid = y_valid[valid_shuffle,...] elif settings.options.D25: x_data = trainingsubset['imagedata'] y_data = trainingsubset['truthdata'] x_valid = validsubset['imagedata'] y_valid = validsubset['truthdata'] x_train = thick_slices(x_data, settings.options.thickness, trainingsubset['dataid'], train_index) x_valid = thick_slices(x_valid, settings.options.thickness, validsubset['dataid'], valid_index) y_train = thick_slices(y_data, 1, trainingsubset['dataid'], train_index) y_valid = thick_slices(y_valid, 1, validsubset['dataid'], valid_index) np.random.seed(seed=0) train_shuffle = np.random.permutation(x_train.shape[0]) valid_shuffle = np.random.permutation(x_valid.shape[0]) x_train = x_train[train_shuffle,...] y_train = y_train[train_shuffle,...] x_valid = x_valid[valid_shuffle,...] y_valid = y_valid[valid_shuffle,...] else: np.random.seed(seed=0) np.random.shuffle(trainingsubset) x_train=trainingsubset['imagedata'] y_train=trainingsubset['truthdata'] x_valid=validsubset['imagedata'] y_valid=validsubset['truthdata'] # slicesplit = int(0.9 * totnslice) # TRAINING_SLICES = slice( 0, slicesplit) # VALIDATION_SLICES = slice(slicesplit, totnslice ) print("\nkfolds : ", kfolds) print("idfold : ", idfold) print("slices training : ", ntrainslices) print("slices validation : ", nvalidslices) try: print("slices testing : ", len(testsubset)) except: print("slices testing : 0") ### ### data preprocessing : applying liver mask ### y_train_typed = y_train.astype(settings.SEG_DTYPE) y_train_liver = preprocess.livermask(y_train_typed) x_train_typed = x_train x_train_typed = preprocess.window(x_train_typed, settings.options.hu_lb, settings.options.hu_ub) x_train_typed = preprocess.rescale(x_train_typed, settings.options.hu_lb, settings.options.hu_ub) y_valid_typed = y_valid.astype(settings.SEG_DTYPE) y_valid_liver = preprocess.livermask(y_valid_typed) x_valid_typed = x_valid x_valid_typed = preprocess.window(x_valid_typed, settings.options.hu_lb, settings.options.hu_ub) x_valid_typed = preprocess.rescale(x_valid_typed, settings.options.hu_lb, settings.options.hu_ub) # liver_idx = y_train_typed > 0 # y_train_liver = np.zeros_like(y_train_typed) # y_train_liver[liver_idx] = 1 # # tumor_idx = y_train_typed > 1 # y_train_tumor = np.zeros_like(y_train_typed) # y_train_tumor[tumor_idx] = 1 # # x_masked = x_train * y_train_liver - 100.0*(1.0 - y_train_liver) # x_masked = x_masked.astype(settings.IMG_DTYPE) ### ### create and run model tf.keras.losses.mean_squared_error, ### opt = GetOptimizer() callbacks, modelloc = GetCallbacks(logfileoutputdir, "liver") lss, met = GetLoss() model = get_unet() model.compile(loss = lss, metrics = met, optimizer = opt) print("\n\n\tlivermask training...\tModel parameters: {0:,}".format(model.count_params())) if settings.options.D3: if settings.options.augment: train_datagen = ImageDataGenerator3D( brightness_range=[0.9,1.1], width_shift_range=[-0.1,0.1], height_shift_range=[-0.1,0.1], horizontal_flip=True, vertical_flip=True, zoom_range=0.1, fill_mode='nearest', preprocessing_function=preprocess.post_augment ) train_maskgen = ImageDataGenerator3D() else: train_datagen = ImageDataGenerator3D() train_maskgen = ImageDataGenerator3D() valid_datagen = ImageDataGenerator3D() valid_maskgen = ImageDataGenerator3D() else: if settings.options.augment: train_datagen = ImageDataGenerator2D( brightness_range=[0.9,1.1], width_shift_range=[-0.1,0.1], height_shift_range=[-0.1,0.1], horizontal_flip=True, vertical_flip=True, zoom_range=0.1, fill_mode='nearest', preprocessing_function=preprocess.post_augment ) train_maskgen = ImageDataGenerator2D() else: train_datagen = ImageDataGenerator2D() train_maskgen = ImageDataGenerator2D() valid_datagen = ImageDataGenerator2D() valid_maskgen = ImageDataGenerator2D() sd = 2 # arbitrary but fixed seed for ImageDataGenerators() if settings.options.D25: dataflow = train_datagen.flow(x_train_typed, batch_size=settings.options.trainingbatch, seed=sd, shuffle=True) maskflow = train_maskgen.flow(y_train_liver, batch_size=settings.options.trainingbatch, seed=sd, shuffle=True) validdataflow = valid_datagen.flow(x_valid_typed, batch_size=settings.options.validationbatch, seed=sd, shuffle=True) validmaskflow = valid_maskgen.flow(y_valid_liver, batch_size=settings.options.validationbatch, seed=sd, shuffle=True) else: dataflow = train_datagen.flow(x_train_typed[...,np.newaxis], batch_size=settings.options.trainingbatch, seed=sd, shuffle=True) maskflow = train_maskgen.flow(y_train_liver[...,np.newaxis], batch_size=settings.options.trainingbatch, seed=sd, shuffle=True) validdataflow = valid_datagen.flow(x_valid_typed[...,np.newaxis], batch_size=settings.options.validationbatch, seed=sd, shuffle=True) validmaskflow = valid_maskgen.flow(y_valid_liver[...,np.newaxis], batch_size=settings.options.validationbatch, seed=sd, shuffle=True) train_generator = zip(dataflow, maskflow) valid_generator = zip(validdataflow, validmaskflow) history_liver = model.fit_generator( train_generator, steps_per_epoch= ntrainslices // settings.options.trainingbatch, validation_steps = nvalidslices // settings.options.validationbatch, epochs=settings.options.numepochs, validation_data=valid_generator, callbacks=callbacks, shuffle=True) ### ### make predicions on validation set ### print("\n\n\tapplying models...") if settings.options.D25: y_pred_float = model.predict( x_valid_typed )[...,0] #[...,settings.options.thickness] ) else: y_pred_float = model.predict( x_valid_typed[...,np.newaxis] )[...,0] y_pred_seg = (y_pred_float >= settings.options.segthreshold).astype(settings.SEG_DTYPE) if settings.options.D3: x_valid = unthick(x_valid, settings.options.thickness, validsubset['dataid'], valid_index) y_valid = unthick(y_valid, settings.options.thickness, validsubset['dataid'], valid_index) y_valid_liver = unthick(y_valid_liver, settings.options.thickness, validsubset['dataid'], valid_index) y_pred_float = unthick(y_pred_float, settings.options.thickness, validsubset['dataid'], valid_index) y_pred_seg = unthick(y_pred_seg, settings.options.thickness, validsubset['dataid'], valid_index) print("\tsaving to file...") trueinnii = nib.Nifti1Image(x_valid, None) truesegnii = nib.Nifti1Image(y_valid, None) # windownii = nib.Nifti1Image(x_valid_typed, None) truelivernii = nib.Nifti1Image(y_valid_liver, None) predsegnii = nib.Nifti1Image(y_pred_seg, None ) predfloatnii = nib.Nifti1Image(y_pred_float, None) trueinnii.to_filename( logfileoutputdir+'/nii/trueimg.nii.gz') truesegnii.to_filename( logfileoutputdir+'/nii/truseg.nii.gz') # windownii.to_filename( logfileoutputdir+'/nii/windowedimg.nii.gz') truelivernii.to_filename( logfileoutputdir+'/nii/trueliver.nii.gz') predsegnii.to_filename( logfileoutputdir+'/nii/predtumorseg.nii.gz') predfloatnii.to_filename( logfileoutputdir+'/nii/predtumorfloat.nii.gz') print("t\done saving.") return modelloc