def populateTestResults(models, inputfolder, outputfilename, cvsplits=3): output = [] for model in models: df = None for cv in range(cvsplits): df1 = pd.read_csv(fr"{inputfolder}\{model}_1_{cv}\predictions.csv", index_col=0) df2 = pd.read_csv(fr"{inputfolder}\{model}_2_{cv}\predictions.csv", index_col=0) df1 = df1[df1["Phase"] != "val"] df2 = df2[df2["Phase"] != "val"] df1 = df1.rename(columns={'Pred': fr'Pred_{cv}'}) df2 = df2.rename(columns={'Pred': fr'Pred_{cv}'}) dfcv = df1.merge(df2, on=["FileName", "True", "Phase"]) df = dfcv if df is None else df.merge( dfcv, on=["FileName", "True", "Phase"]) df["Pred_x"] = df["Pred_0_x"] + df["Pred_1_x"] + df["Pred_2_x"] df["Pred_y"] = df["Pred_0_y"] + df["Pred_1_y"] + df["Pred_2_y"] ppred1 = df[(df["Phase"] == "test1") | (df["Phase"] == "test2")]["Pred_x"].values ppred2 = df[(df["Phase"] == "test1") | (df["Phase"] == "test2")]["Pred_y"].values ppred1_auc = df[(df["Phase"] == "test1")]["Pred_x"].values ppred2_auc = df[(df["Phase"] == "test2")]["Pred_y"].values ptrue1 = df[(df["Phase"] == "test1")]["True"].values ptrue2 = df[(df["Phase"] == "test2")]["True"].values picc31, pci1, pci2 = getIcc31(ppred1, ppred2) p1auc, p1c1, p1c2 = getAUC(ptrue1, ppred1_auc) p2auc, p2c1, p2c2 = getAUC(ptrue2, ppred2_auc) output.append( (model, p1auc, p1c1, p1c2, p2auc, p2c1, p2c2, picc31, pci1, pci2)) df = pd.DataFrame(output, columns=[ "Model", "AUC1", "AUC1-CI1", "AUC1-CI2", "AUC2", "AUC2-CI1", "AUC2-CI2", "ICC", "ICC-CI1", "ICC-CI2" ]) DataUtil.mkdir(fr"outputs\results") df.to_csv(fr"outputs\results\{outputfilename}_holdout.csv", index=None)
def evaluate_segmentation_cv(scan, dataset): """ Evaluates lesion detection and segmentation performance of the cross validation set. scan : Test/ Retest dataset: b-value settings or the dataset name returns: a tuple (Mean segmentation dice score, standard deviation of segmentation dice, # hits (No of lesions detected), # misses, # false positives) """ dices = [] h1 = 0 m1 = 0 f1 = 0 for cv in range(3): segpath = fr"outputs\segmentations\{dataset}\{scan}_{cv}" splitspathname = fr"{dataset}_{scan}_{cv}" splitspath = fr"outputs\splits\{splitspathname}.json" splitsdict = DataUtil.readJson(splitspath) samples = splitsdict.items() valcases = [x[0] for x in samples if x[1] == "val"] for case in valcases: probs = sitk.ReadImage(fr"{segpath}\{case}\prob.nii.gz") probs = DataUtil.convert2binary(probs) probs = removeSmallLesions(probs) gt = sitk.ReadImage(fr"{segpath}\{case}\gt.nii.gz") gt = sitk.GetArrayFromImage(gt) gt[gt == 1] = 0 gt = sitk.GetImageFromArray(gt) gt = DataUtil.convert2binary(gt) dice1, hits1, misses1, fps1 = get_dice_repeatability(gt, probs) dices.append(dice1) h1 += hits1 m1 += misses1 f1 += fps1 dices = [y for x in dices if x is not None for y in x] return (np.mean(dices), np.std(dices), h1, m1, f1)
def populateCrossValAUC(models, inputfolder, outputfilename, cvsplits=3): output = [] columns = [] for i, model in enumerate(models): row = [] pred1 = [] true1 = [] pred2 = [] true2 = [] for cv in range(cvsplits): df1 = pd.read_csv(fr"{inputfolder}\{model}_1_{cv}\predictions.csv", index_col=0) df2 = pd.read_csv(fr"{inputfolder}\{model}_2_{cv}\predictions.csv", index_col=0) df1 = df1[df1["Phase"] == "val"] df2 = df2[df2["Phase"] == "val"] pred1.extend(df1["Pred"].values) true1.extend(df1["True"].values) pred2.extend(df2["Pred"].values) true2.extend(df2["True"].values) a1, c11, c12 = getAUC(true1, pred1) a2, c21, c22 = getAUC(true2, pred2) row.extend([model, a1, c11, c12, a2, c21, c22]) if i == 0: columns.extend([ fr"Model", fr"AUC-Scan1", fr"CI1-Scan1", fr"CI2-Scan1", fr"AUC-Scan2", fr"CI1-Scan2", fr"CI2-Scan2", ]) output.append(tuple(row)) df = pd.DataFrame(output, columns=columns) DataUtil.mkdir(fr"outputs\results") df.to_csv(fr"outputs\results\{outputfilename}_cv.csv", index=None)
def getAugmentedData(folderpath, modality, nosamples=None): """ folderpath : path to folder containing images, mask modality : T2W/ ADC nosamples : The number of augmented samples to be generated """ folderpath = Path(folderpath) try: ext = folderpath.glob(fr"{modality}*").__next__().stem.split(".")[-1] except: import pdb pdb.set_trace() if ext == "gz": ext = ".".join( glob(fr"{folderpath}\**")[0].split("\\")[-1].split(".")[-2:]) img = sitk.ReadImage(str(folderpath.joinpath(fr"{modality}.{ext}"))) pm = sitk.ReadImage(str(folderpath.joinpath(fr"PM.{ext}"))) pm = DataUtil.convert2binary(pm) ls = sitk.ReadImage(str(folderpath.joinpath(fr"LS.{ext}"))) ret = [] orgimg, augs = _getAugmentedData(img, [pm, ls], nosamples) ret.append((orgimg)) if augs is not None: for i in range(len(augs)): ret.append(augs[i]) return ret
class algorithmUtil(): def __init__(self): # 获取数据处理工具 self.dUtil = DataUtil() self.dUtil.getDataWithDate() def waverec(self, key): # 生成四层小波模型 y = np.array(self.dUtil.Data[key][1][1:]) coeffs = pywt.wavedec(y, 'db4', level=4) ya4 = pywt.waverec( np.multiply(coeffs, [1, 0, 0, 0, 0]).tolist(), 'db4') yd4 = pywt.waverec( np.multiply(coeffs, [0, 1, 0, 0, 0]).tolist(), 'db4') yd3 = pywt.waverec( np.multiply(coeffs, [0, 0, 1, 0, 0]).tolist(), 'db4') yd2 = pywt.waverec( np.multiply(coeffs, [0, 0, 0, 1, 0]).tolist(), 'db4') yd1 = pywt.waverec( np.multiply(coeffs, [0, 0, 0, 0, 1]).tolist(), 'db4') # 返回四层小波 return [y, ya4, yd4, yd3, yd2, yd1] def dataEvaluation(self, timestep): ''' 输入时间序列,评估这个序列, 判断出是否存在 趋势,如果存在趋势,我们就选择ARIMA模型来做 :param timestep: :return: ''' pass def predictResult(self, timestep): """ 对时间序列来进行预测,首先我们通过可视化数据了解到整体有周期性趋势 所以,我们计算每个频段的周期,然后将其整合,但如果从开始之后就没有波动 那么久可以基本判断出周期为无限(也就是直接判断为所有10以下的平均) :param timestep: :return model:包括周期,基本时间序列: """ return ARIMAUtil.furtureCast(timestep, 15)
if __name__ == "__main__": # the b-value set/ dataset for which the segmentations have to be saved. bset = "cspca" outputfolder = fr"outputs\segmentations\{bset}" cases = [] # collect case names from both the scans (test and retest) for the particular b-value set. splitspathname = fr"{bset}_1_0" splitspath = fr"outputs\splits\{splitspathname}.json" splitsdict = DataUtil.readJson(splitspath) cases.extend(list(splitsdict.keys())) splitspathname = fr"{bset}_2_0" splitspath = fr"outputs\splits\{splitspathname}.json" splitsdict = DataUtil.readJson(splitspath) cases.extend(list(splitsdict.keys())) modality = 'ADC' # looping through the test retest scans and the cross validation loops for scan in range(1, 3): for cv in range(3): print(scan, cv)
def save_resampled_volumes(testcases, dataset, scan): for case in testcases: print(case) case1 = case case2 = case.replace("Scan1", "Scan2") probs1 = None probs2 = None for cv in range(3): spacing = (1, 1, 1) outputfolder1 = fr"outputs\segmentations\{dataset}\{scan}_{cv}\{case1}\reg" outputfolder2 = fr"outputs\segmentations\{dataset}\{scan}_{cv}\{case2}\reg" DataUtil.mkdir(outputfolder1) DataUtil.mkdir(outputfolder2) img1 = sitk.ReadImage( fr"outputs\segmentations\{dataset}\{scan}_{cv}\{case1}\img.nii.gz" ) img2 = sitk.ReadImage( fr"outputs\segmentations\{dataset}\{scan}_{cv}\{case2}\img.nii.gz" ) origin1 = img1.GetOrigin() origin2 = img2.GetOrigin() pm1 = sitk.ReadImage( fr"outputs\segmentations\{dataset}\{scan}_{cv}\{case1}\pm.nii.gz" ) pm2 = sitk.ReadImage( fr"outputs\segmentations\{dataset}\{scan}_{cv}\{case2}\pm.nii.gz" ) pm1 = copy_parameters(pm1, img1) pm2 = copy_parameters(pm2, img2) gt1 = sitk.ReadImage( fr"outputs\segmentations\{dataset}\{scan}_{cv}\{case1}\gt.nii.gz" ) gt2 = sitk.ReadImage( fr"outputs\segmentations\{dataset}\{scan}_{cv}\{case2}\gt.nii.gz" ) gt1 = copy_parameters(gt1, img1) gt2 = copy_parameters(gt2, img2) probs1 = sitk.ReadImage( fr"outputs\segmentations\{dataset}\{scan}_{cv}\{case1}\prob.nii.gz" ) probs2 = sitk.ReadImage( fr"outputs\segmentations\{dataset}\{scan}_{cv}\{case2}\prob.nii.gz" ) probs1 = copy_parameters(probs1, img1) probs2 = copy_parameters(probs2, img2) img1 = DataUtil.resampleimage(img1, spacing, origin1, interpolator=sitk.sitkLinear) gt1 = DataUtil.resampleimage(gt1, spacing, origin1, interpolator=sitk.sitkNearestNeighbor) pm1 = DataUtil.resampleimage(pm1, spacing, origin1, interpolator=sitk.sitkNearestNeighbor) probs1 = DataUtil.resampleimage( probs1, spacing, origin1, interpolator=sitk.sitkNearestNeighbor) img2 = DataUtil.resampleimage(img2, spacing, origin2, interpolator=sitk.sitkLinear) gt2 = DataUtil.resampleimage(gt2, spacing, origin2, interpolator=sitk.sitkNearestNeighbor) pm2 = DataUtil.resampleimage(pm2, spacing, origin2, interpolator=sitk.sitkNearestNeighbor) probs2 = DataUtil.resampleimage( probs2, spacing, origin2, interpolator=sitk.sitkNearestNeighbor) sitk.WriteImage(img1, fr"{outputfolder1}\img.nii.gz") sitk.WriteImage(gt1, fr"{outputfolder1}\gt.nii.gz") sitk.WriteImage(pm1, fr"{outputfolder1}\pm.nii.gz") sitk.WriteImage(probs1, fr"{outputfolder1}\prob.nii.gz") sitk.WriteImage(img2, fr"{outputfolder2}\img.nii.gz") sitk.WriteImage(gt2, fr"{outputfolder2}\gt.nii.gz") sitk.WriteImage(pm2, fr"{outputfolder2}\pm.nii.gz") sitk.WriteImage(probs2, fr"{outputfolder2}\prob.nii.gz")
def _save_registered_volumes(testcases, dataset, scan): for case in testcases: print(case) if scan == 1: case1 = case case2 = case.replace("Scan1", "Scan2") else: case2 = case case1 = case.replace("Scan1", "Scan2") other_scan = 1 if scan == 2 else 1 probs1 = None probs2 = None for cv in range(3): outputfolder = fr"outputs\segmentations\{dataset}\{scan}_{cv}\{case1}\reg" segpath1 = fr"outputs\segmentations\{dataset}\{scan}_{cv}\{case1}\reg\img.nii.gz" segpath2 = fr"outputs\segmentations\{dataset}\{scan}_{cv}\{case2}\reg\img.nii.gz" pmpath1 = fr"outputs\segmentations\{dataset}\{scan}_{cv}\{case1}\reg\pm.nii.gz" pmpath2 = fr"outputs\segmentations\{dataset}\{scan}_{cv}\{case2}\reg\pm.nii.gz" gtpath1 = fr"outputs\segmentations\{dataset}\{scan}_{cv}\{case1}\reg\gt.nii.gz" gtpath2 = fr"outputs\segmentations\{dataset}\{scan}_{cv}\{case2}\reg\gt.nii.gz" probpath1 = fr"outputs\segmentations\{dataset}\{scan}_{cv}\{case1}\reg\prob.nii.gz" probpath2 = fr"outputs\segmentations\{dataset}\{scan}_{cv}\{case2}\reg\prob.nii.gz" cmd1 = "elastix -f {} -fMask {} -m {} -mMask {} -p affineTest.txt -out {}".format( segpath1, pmpath1, segpath2, pmpath2, outputfolder) subprocess.call(['cmd', '/c', cmd1]) addline = "(FinalBSplineInterpolationOrder 0)" with open(fr"{outputfolder}\TransformParameters.0.txt", "r") as infile: trans = infile.read() infile.close() trans = trans + "\n" + addline trans = trans.replace( '(ResampleInterpolator "FinalLinearInterpolator")', '(ResampleInterpolator "FinalNearestNeighborInterpolator")') with open(fr"{outputfolder}\TransformParametersMASK.0.txt", "w") as infile: infile.writelines(trans) infile.close() DataUtil.mkdir(fr"{outputfolder}\img{other_scan}") DataUtil.mkdir(fr"{outputfolder}\gt{other_scan}") DataUtil.mkdir(fr"{outputfolder}\prob{other_scan}") cmd2 = "transformix -in {} -tp {}\\TransformParameters.0.txt -out {}\\img{}".format( segpath2, outputfolder, outputfolder, other_scan) subprocess.call(['cmd', '/c', cmd2]) cmd2 = "transformix -in {} -tp {}\\TransformParametersMASK.0.txt -out {}\\gt{}".format( gtpath2, outputfolder, outputfolder, other_scan) subprocess.call(['cmd', '/c', cmd2]) cmd3 = "transformix -in {} -tp {}\\TransformParametersMASK.0.txt -out {}\\prob{}".format( probpath2, outputfolder, outputfolder, other_scan) subprocess.call(['cmd', '/c', cmd3])
gtpath2, outputfolder, outputfolder, other_scan) subprocess.call(['cmd', '/c', cmd2]) cmd3 = "transformix -in {} -tp {}\\TransformParametersMASK.0.txt -out {}\\prob{}".format( probpath2, outputfolder, outputfolder, other_scan) subprocess.call(['cmd', '/c', cmd3]) def save_registered_volumes(testcases, dataset): save_resampled_volumes(testcases, dataset, 1) save_resampled_volumes(testcases, dataset, 2) _save_registered_volumes(testcases, dataset, 1) _save_registered_volumes(testcases, dataset, 2) if __name__ == "__main__": # the b-value set/ dataset for which the images have to be registered. dataset = 'cspca' # collect file names for the dataset splitspathname = fr"{dataset}_1_0" splitspath = fr"outputs\splits\{splitspathname}.json" splitsdict = DataUtil.readJson(splitspath) samples = splitsdict.items() testcases = [x[0] for x in samples if x[1] == "test"] save_registered_volumes(testcases, dataset)
def main(): for scan in range(1, 3): for cv in range(3): # modality of the imgaging data. The images saved with the same name. Ex. ADC.nii, T2W.nii modality = 'ADC' # no of augmentation samples to be generated nosamples = 10 # size of the final output patches newsize2D = 96 # path to the splits dictionary. Splits between train, test. key: Case/File name, value : Phase (either train/ test) splitspathname = fr"cspca_{scan}_{cv}" splitspath = fr"outputs\splits\{splitspathname}.json" splitsdict = DataUtil.readJson(splitspath) cases = list(splitsdict.keys()) # generate an empty hdf5 file to store the patches. s createHDF5(splitspathname, splitsdict, newsize2D) casenames = {} casenames["train"] = [] casenames["val"] = [] casenames["test"] = [] # minimum and maximum value of the intensties in the image, to avoid image artifacts. # The images are normalized with respect to this min and max. _min = 0 _max = 3000 # Read each volume, extract patches and store in hdf5 format. for j, name in enumerate(cases): dataset = name.split("_")[0] sb = Path( fr"..\Data\{dataset}\{modality}\1_Original_Organized_merged\{name}" ) name = sb.stem print(name, float(j) / len(cases)) phase = splitsdict[name] if phase == "train": ret = getAugmentedData(sb, modality, nosamples=nosamples) else: ret = getAugmentedData(sb, modality, nosamples=None) for k, aug in enumerate(ret): augimg = aug[0] augpm = aug[1][0] augls = aug[1][1] augpm = sitk.BinaryDilate(augpm, 2, sitk.sitkBall) # Add the patches to the hdf5 file cnt = addToHDF5(augimg, augpm, augls, phase, splitspathname, _min, _max, newsize2D) # collect case/file names casename = name if k == 0 else fr"{name}_A{k}" for slno in range(cnt): casenames[phase].append(fr"{casename}_{slno}") # saving the filenames, ground-truth information in the same hdf5 file outputfolder = fr"outputs\hdf5\{splitspathname}" for phase in ["train", "test", "val"]: hdf5_file = tables.open_file(fr'{outputfolder}\{phase}.h5', mode='a') hdf5_file.create_array(hdf5_file.root, fr'names', casenames[phase]) hdf5_file.close()
def train_and_test(modleName, embedding_size, wordDim, maxlen, wordVec): print("loading data...") dataUtil = DataUtil() wordMap, wordVec, train_arg1, train_arg2, train_label, dev_arg1, dev_arg2, dev_label, test_arg1, test_arg2, test_label = dataUtil.loadData( rel, wordDim, wordVec) print(len(train_arg1), 'train sequences') print(len(dev_arg1), "dev sequences") print(len(test_arg1), 'test sequences') print('Pad sequences(samples x time)') train_arg1 = sequence.pad_sequences(train_arg1, maxlen=maxlen, padding='post', truncating='post') train_arg2 = sequence.pad_sequences(train_arg2, maxlen=maxlen, padding='post', truncating='post') test_arg1 = sequence.pad_sequences(test_arg1, maxlen=maxlen, padding='post', truncating='post') test_arg2 = sequence.pad_sequences(test_arg2, maxlen=maxlen, padding='post', truncating='post') dev_arg1 = sequence.pad_sequences(dev_arg1, maxlen=maxlen, padding='post', truncating='post') dev_arg2 = sequence.pad_sequences(dev_arg2, maxlen=maxlen, padding='post', truncating='post') print('Build model...') model = buildBranchModel(modelName, wordVec, embedding_size, wordDim, maxlen) adam = Adagrad(lr=0.01, epsilon=1e-06) model.compile(loss='binary_crossentropy', metrics=[ut.f_score], optimizer=adam) print("use modle:", modelName) print(rel + " vs ohters.") print('Train...') bestDevF = 0 devBestTestF = 0 devBestTestAcc = 0 bestTestF = 0 bestTestAcc = 0 dataUtil = DataUtil() for each in range(nb_epoch): model.fit([train_arg1, train_arg2], train_label, batch_size=batch_size, nb_epoch=1, validation_data=([dev_arg1, dev_arg2], dev_label)) devResult = model.predict_classes([dev_arg1, dev_arg2], batch_size=batch_size, verbose=1) df_measure, dpre, drecall, dacc = dr_evaluate(dev_label, devResult) print("[" + str(each) + '] dev F-measure:' + str(df_measure) + " dev acc:" + str(dacc)) result = model.predict_classes([test_arg1, test_arg2], batch_size=batch_size, verbose=1) f_measure, pre, recall, acc = dr_evaluate(test_label, result) if f_measure > bestTestF: bestTestF = f_measure bestTestAcc = acc if bestDevF < df_measure: bestDevF = df_measure devBestTestF = f_measure devBestTestAcc = acc print("test f:" + str(f_measure) + " acc:" + str(acc) + " bestF:" + str(bestTestF) + " bestAcc:" + str(bestTestAcc) + " devBestTestF:" + str(devBestTestF) + " devBestTestAcc:" + str(devBestTestAcc)) (train_arg1, train_arg2, train_label) = dataUtil.load_traindata(rel, wordMap) train_arg1 = sequence.pad_sequences(train_arg1, maxlen=maxlen, padding='post', truncating='post') train_arg2 = sequence.pad_sequences(train_arg2, maxlen=maxlen, padding='post', truncating='post') print( "**********************************************************************" ) print( "**********************************************************************" ) print("use modle:", modelName + " mode:" + mode) print(rel + " vs others.") print("dev bestF:" + str(bestDevF) + " testF:" + str(devBestTestF) + " testAcc:" + str(devBestTestAcc)) print("best test: f_score " + str(bestTestF) + " bestTestAcc " + str(bestTestAcc)) print( "**********************************************************************" ) print( "**********************************************************************" ) return devBestTestF, devBestTestAcc
def evaluate_segmentation_performance_repeatability_holdout( testcases, dataset): """ Evaluates lesion detection and segmentation performance of the cross validation set. Also evaluates repeatability of lesion segmentation in terms of dice similarity coefficient testcases : filenames of testcases dataset: b-value settings or the dataset name returns: a tuple (Mean and standard deviation of dice of first scan, Mean, standard deviation of dice of second scan, Mean, standard deviation of dice between scans (repeatability) # hits (No of lesions detected), # misses, # false positives for scan1, # # hits (No of lesions detected), # misses, # false positives for scan2, # agreement and disagreements between the network.) """ dices = [] h1 = 0 h2 = 0 h3 = 0 f1 = 0 f2 = 0 f3 = 0 m1 = 0 m2 = 0 m3 = 0 for case in testcases: probs1 = None probs2 = None gtpath1 = fr"outputs\segmentations\{dataset}\1_0\{case}\gt.nii.gz" gt1 = sitk.ReadImage(gtpath1) gt1 = sitk.GetArrayFromImage(gt1) gt1[gt1 == 1] = 0 gt1 = sitk.GetImageFromArray(gt1) gt1 = DataUtil.convert2binary(gt1) for cv in range(3): probpath1 = fr"outputs\segmentations\{dataset}\1_{cv}\{case}\prob.nii.gz" probpath2 = fr"outputs\segmentations\{dataset}\2_{cv}\{case}\prob.nii.gz" probs1_ = DataUtil.convert2binary(sitk.ReadImage(probpath1)) probs2_ = DataUtil.convert2binary(sitk.ReadImage(probpath2)) probs1 = probs1_ if probs1 is None else sitk.Add(probs1, probs1_) probs2 = probs2_ if probs2 is None else sitk.Add(probs2, probs2_) probs1 = filterSegmentation(probs1) probs2 = filterSegmentation(probs2) probs1 = removeSmallLesions(probs1) probs2 = removeSmallLesions(probs2) dice1, hits1, misses1, fps1 = get_dice_repeatability(gt1, probs1) dice2, hits2, misses2, fps2 = get_dice_repeatability(gt1, probs2) dice3, hits3, misses3, fps3 = get_dice_repeatability(probs1, probs2) dices.append((dice1, dice2, dice3)) h1 += hits1 m1 += misses1 f1 += fps1 h2 += hits2 m2 += misses2 f2 += fps2 h3 += hits3 m3 += misses3 f3 += fps3 dice1, dice2, dice3 = zip(*dices) dice1 = [y for x in dice1 if x is not None for y in x] dice2 = [y for x in dice2 if x is not None for y in x] dice3 = [y for x in dice3 if x is not None for y in x] print(np.mean(dice1), np.std(dice1)) print(np.mean(dice2), np.std(dice2)) print(np.mean(dice3), np.std(dice3)) print(h1, m1, f1) print(h2, m2, f2) print(h3, m3, f3) return ((np.mean(dice1), np.std(dice1)), (np.mean(dice2), np.std(dice2)), (np.mean(dice3), np.std(dice3)), (h1, m1, f1), (h2, m2, f2), (h3, m3 + f3))
def __init__(self): # 获取数据处理工具 self.dUtil = DataUtil() self.dUtil.getDataWithDate()