def collect(annotations_filename,annotations_excluded_filename,seriesuids_filename,results_filename): annotations = csvTools.readCSV(annotations_filename) annotations_excluded = csvTools.readCSV(annotations_excluded_filename) results = csvTools.readCSV(results_filename) seriesUIDs_csv = csvTools.readCSV(seriesuids_filename) seriesuid_results = [] counter = 0; for row in results: if row[1] != 'seriesuid': if counter < 100000: seriesuid_results.append(row[1]) counter += 1; annotations_new = [] for row in annotations: if row[0] == 'seriesuid': annotations_new.append(row) elif row[0] in seriesuid_results: annotations_new.append(row) annotations = annotations_new seriesUIDs = [] for seriesUID in seriesUIDs_csv: seriesUIDs.append(seriesUID[0]) allNodules = collectNoduleAnnotations(annotations, annotations_excluded, seriesUIDs) return (allNodules, seriesUIDs, results)
def collect(annotations_filename, annotations_excluded_filename, seriesuids_filename): annotations = csvTools.readCSV(annotations_filename) annotations_excluded = csvTools.readCSV(annotations_excluded_filename) seriesUIDs_csv = csvTools.readCSV(seriesuids_filename) seriesUIDs = [] for seriesUID in seriesUIDs_csv: seriesUIDs.append(seriesUID[0]) allNodules = collectNoduleAnnotations(annotations, annotations_excluded, seriesUIDs) return (allNodules, seriesUIDs)
def collect(annotations_filename, seriesuids_filename): annotations = csvTools.readCSV(annotations_filename) # 读取GT标注文件 seriesUIDs_csv = csvTools.readCSV(seriesuids_filename) # 读取GT文件名列表 seriesUIDs = [] for seriesUID in seriesUIDs_csv: # 将CSV文件内容转化成一个list,每个元素为一个CT图像名 seriesUIDs.append(seriesUID[0]) allNodules = collectNoduleAnnotations( annotations, seriesUIDs) # 返回的是所有的nodule,以CT图像名字索引 return (allNodules, seriesUIDs)
def collect(annotations_filename,annotations_excluded_filename,seriesuids_filename): annotations = csvTools.readCSV(annotations_filename) annotations_excluded = csvTools.readCSV(annotations_excluded_filename) seriesUIDs_csv = csvTools.readCSV(seriesuids_filename) seriesUIDs = [] for seriesUID in seriesUIDs_csv: seriesUIDs.append(seriesUID[0]) allNodules = collectNoduleAnnotations(annotations, annotations_excluded, seriesUIDs) return (allNodules, seriesUIDs)
def evaluateCAD(seriesUIDs, results_filename, outputDir, allNodules, CADSystemName, maxNumberOfCADMarks=-1, performBootstrapping=False, numberOfBootstrapSamples=1000, confidence=0.95): ''' function to evaluate a CAD algorithm @param seriesUIDs: list of the seriesUIDs of the cases to be processed @param results_filename: file with results @param outputDir: output directory @param allNodules: dictionary with all nodule annotations of all cases, keys of the dictionary are the seriesuids @param CADSystemName: name of the CAD system, to be used in filenames and on FROC curve ''' nodOutputfile = open(os.path.join(outputDir, 'CADAnalysis.txt'), 'w') nodOutputfile.write("\n") nodOutputfile.write((60 * "*") + "\n") nodOutputfile.write("CAD Analysis: %s\n" % CADSystemName) nodOutputfile.write((60 * "*") + "\n") nodOutputfile.write("\n") results = csvTools.readCSV(results_filename) allCandsCAD = {} for seriesuid in seriesUIDs: # collect candidates from result file nodules = {} header = results[0] i = 0 for result in results[1:]: nodule_seriesuid = result[header.index(seriesuid_label)] if seriesuid == nodule_seriesuid: nodule = getNodule(result, header) nodule.candidateID = i nodules[nodule.candidateID] = nodule i += 1 if (maxNumberOfCADMarks > 0): # number of CAD marks, only keep must suspicous marks if len(nodules.keys()) > maxNumberOfCADMarks: # make a list of all probabilities probs = [] for keytemp, noduletemp in nodules.iteritems(): probs.append(float(noduletemp.CADprobability)) probs.sort(reverse=True) # sort from large to small probThreshold = probs[maxNumberOfCADMarks] nodules2 = {} nrNodules2 = 0 for keytemp, noduletemp in nodules.iteritems(): if nrNodules2 >= maxNumberOfCADMarks: break if float(noduletemp.CADprobability) > probThreshold: nodules2[keytemp] = noduletemp nrNodules2 += 1 nodules = nodules2 # print 'adding candidates: ' + seriesuid allCandsCAD[seriesuid] = nodules # open output files nodNoCandFile = open( os.path.join(outputDir, "nodulesWithoutCandidate_%s.txt" % CADSystemName), 'w') # --- iterate over all cases (seriesUIDs) and determine how # often a nodule annotation is not covered by a candidate # initialize some variables to be used in the loop candTPs = 0 candFPs = 0 candFNs = 0 candTNs = 0 totalNumberOfCands = 0 totalNumberOfNodules = 0 doubleCandidatesIgnored = 0 irrelevantCandidates = 0 minProbValue = -1000000000.0 # minimum value of a float FROCGTList = [] FROCProbList = [] FPDivisorList = [] excludeList = [] FROCtoNoduleMap = [] ignoredCADMarksList = [] # -- loop over the cases for seriesuid in seriesUIDs: # get the candidates for this case try: candidates = allCandsCAD[seriesuid] except KeyError: candidates = {} # add to the total number of candidates totalNumberOfCands += len(candidates.keys()) # make a copy in which items will be deleted candidates2 = candidates.copy() # get the nodule annotations on this case try: noduleAnnots = allNodules[seriesuid] except KeyError: noduleAnnots = [] # - loop over the nodule annotations for noduleAnnot in noduleAnnots: # increment the number of nodules if noduleAnnot.state == "Included": totalNumberOfNodules += 1 x = float(noduleAnnot.coordX) y = float(noduleAnnot.coordY) z = float(noduleAnnot.coordZ) # 2. Check if the nodule annotation is covered by a candidate # A nodule is marked as detected when the center of mass of the candidate is within a distance R of # the center of the nodule. In order to ensure that the CAD mark is displayed within the nodule on the # CT scan, we set R to be the radius of the nodule size. diameter = float(noduleAnnot.diameter_mm) if diameter < 0.0: diameter = 10.0 radiusSquared = pow((diameter / 2.0), 2.0) found = False noduleMatches = [] for key, candidate in candidates.iteritems(): x2 = float(candidate.coordX) y2 = float(candidate.coordY) z2 = float(candidate.coordZ) dist = math.pow(x - x2, 2.) + math.pow(y - y2, 2.) + math.pow( z - z2, 2.) if dist < radiusSquared: if (noduleAnnot.state == "Included"): found = True noduleMatches.append(candidate) if key not in candidates2.keys(): print( "This is strange: CAD mark %s detected two nodules! Check for overlapping nodule annotations, SeriesUID: %s, nodule Annot ID: %s" % (str(candidate.id), seriesuid, str(noduleAnnot.id))) else: del candidates2[key] elif (noduleAnnot.state == "Excluded" ): # an excluded nodule if bOtherNodulesAsIrrelevant: # delete marks on excluded nodules so they don't count as false positives if key in candidates2.keys(): irrelevantCandidates += 1 ignoredCADMarksList.append( "%s,%s,%s,%s,%s,%s,%.9f" % (seriesuid, -1, candidate.coordX, candidate.coordY, candidate.coordZ, str(candidate.id), float(candidate.CADprobability))) del candidates2[key] if len(noduleMatches) > 1: # double detection doubleCandidatesIgnored += (len(noduleMatches) - 1) if noduleAnnot.state == "Included": # only include it for FROC analysis if it is included # otherwise, the candidate will not be counted as FP, but ignored in the # analysis since it has been deleted from the nodules2 vector of candidates if found == True: # append the sample with the highest probability for the FROC analysis maxProb = None for idx in range(len(noduleMatches)): candidate = noduleMatches[idx] if (maxProb is None) or (float( candidate.CADprobability) > maxProb): maxProb = float(candidate.CADprobability) FROCGTList.append(1.0) FROCProbList.append(float(maxProb)) FPDivisorList.append(seriesuid) excludeList.append(False) FROCtoNoduleMap.append( "%s,%s,%s,%s,%s,%.9f,%s,%.9f" % (seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameter_mm), str( candidate.id), float(candidate.CADprobability))) candTPs += 1 else: candFNs += 1 # append a positive sample with the lowest probability, such that this is added in the FROC analysis FROCGTList.append(1.0) FROCProbList.append(minProbValue) FPDivisorList.append(seriesuid) excludeList.append(True) FROCtoNoduleMap.append( "%s,%s,%s,%s,%s,%.9f,%s,%s" % (seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameter_mm), int(-1), "NA")) nodNoCandFile.write( "%s,%s,%s,%s,%s,%.9f,%s\n" % (seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameter_mm), str(-1))) # add all false positives to the vectors for key, candidate3 in candidates2.iteritems(): candFPs += 1 FROCGTList.append(0.0) FROCProbList.append(float(candidate3.CADprobability)) FPDivisorList.append(seriesuid) excludeList.append(False) FROCtoNoduleMap.append( "%s,%s,%s,%s,%s,%s,%.9f" % (seriesuid, -1, candidate3.coordX, candidate3.coordY, candidate3.coordZ, str( candidate3.id), float(candidate3.CADprobability))) if not (len(FROCGTList) == len(FROCProbList) and len(FROCGTList) == len(FPDivisorList) and len(FROCGTList) == len(FROCtoNoduleMap) and len(FROCGTList) == len(excludeList)): nodOutputfile.write( "Length of FROC vectors not the same, this should never happen! Aborting..\n" ) nodOutputfile.write("Candidate detection results:\n") nodOutputfile.write(" True positives: %d\n" % candTPs) nodOutputfile.write(" False positives: %d\n" % candFPs) nodOutputfile.write(" False negatives: %d\n" % candFNs) nodOutputfile.write(" True negatives: %d\n" % candTNs) nodOutputfile.write(" Total number of candidates: %d\n" % totalNumberOfCands) nodOutputfile.write(" Total number of nodules: %d\n" % totalNumberOfNodules) nodOutputfile.write(" Ignored candidates on excluded nodules: %d\n" % irrelevantCandidates) nodOutputfile.write( " Ignored candidates which were double detections on a nodule: %d\n" % doubleCandidatesIgnored) if int(totalNumberOfNodules) == 0: nodOutputfile.write(" Sensitivity: 0.0\n") else: nodOutputfile.write(" Sensitivity: %.9f\n" % (float(candTPs) / float(totalNumberOfNodules))) nodOutputfile.write(" Average number of candidates per scan: %.9f\n" % (float(totalNumberOfCands) / float(len(seriesUIDs)))) # compute FROC fps, sens, thresholds = computeFROC(FROCGTList, FROCProbList, len(seriesUIDs), excludeList) if performBootstrapping: fps_bs_itp, sens_bs_mean, sens_bs_lb, sens_bs_up = computeFROC_bootstrap( FROCGTList, FROCProbList, FPDivisorList, seriesUIDs, excludeList, numberOfBootstrapSamples=numberOfBootstrapSamples, confidence=confidence) # Write FROC curve with open(os.path.join(outputDir, "froc_%s.txt" % CADSystemName), 'w') as f: for i in range(len(sens)): f.write("%.9f,%.9f,%.9f\n" % (fps[i], sens[i], thresholds[i])) # Write FROC vectors to disk as well with open( os.path.join(outputDir, "froc_gt_prob_vectors_%s.csv" % CADSystemName), 'w') as f: for i in range(len(FROCGTList)): f.write("%d,%.9f\n" % (FROCGTList[i], FROCProbList[i])) fps_itp = np.linspace(FROC_minX, FROC_maxX, num=10001) sens_itp = np.interp(fps_itp, fps, sens) frvvlu = 0 nxth = 0.125 for fp, ss in zip(fps_itp, sens_itp): if abs(fp - nxth) < 3e-4: frvvlu += ss nxth *= 2 if abs(nxth - 16) < 1e-5: break print(frvvlu / 7, nxth) print(sens_itp[fps_itp==0.125]+sens_itp[fps_itp==0.25]+sens_itp[fps_itp==0.5]+sens_itp[fps_itp==1]+sens_itp[fps_itp==2]\ +sens_itp[fps_itp==4]+sens_itp[fps_itp==8]) if performBootstrapping: # Write mean, lower, and upper bound curves to disk with open( os.path.join(outputDir, "froc_%s_bootstrapping.csv" % CADSystemName), 'w') as f: f.write( "FPrate,Sensivity[Mean],Sensivity[Lower bound],Sensivity[Upper bound]\n" ) for i in range(len(fps_bs_itp)): f.write("%.9f,%.9f,%.9f,%.9f\n" % (fps_bs_itp[i], sens_bs_mean[i], sens_bs_lb[i], sens_bs_up[i])) else: fps_bs_itp = None sens_bs_mean = None sens_bs_lb = None sens_bs_up = None # create FROC graphs if int(totalNumberOfNodules) > 0: graphTitle = str("") fig1 = plt.figure() ax = plt.gca() clr = 'b' plt.plot(fps_itp, sens_itp, color=clr, label="%s" % CADSystemName, lw=2) if performBootstrapping: plt.plot(fps_bs_itp, sens_bs_mean, color=clr, ls='--') plt.plot(fps_bs_itp, sens_bs_lb, color=clr, ls=':') # , label = "lb") plt.plot(fps_bs_itp, sens_bs_up, color=clr, ls=':') # , label = "ub") ax.fill_between(fps_bs_itp, sens_bs_lb, sens_bs_up, facecolor=clr, alpha=0.05) xmin = FROC_minX xmax = FROC_maxX plt.xlim(xmin, xmax) plt.ylim(0.5, 1) plt.xlabel('Average number of false positives per scan') plt.ylabel('Sensitivity') plt.legend(loc='lower right') plt.title('FROC performance - %s' % (CADSystemName)) if bLogPlot: plt.xscale('log', basex=2) ax.xaxis.set_major_formatter( FixedFormatter([0.125, 0.25, 0.5, 1, 2, 4, 8])) # set your ticks manually ax.xaxis.set_ticks([0.125, 0.25, 0.5, 1, 2, 4, 8]) ax.yaxis.set_ticks(np.arange(0.5, 1, 0.1)) # ax.yaxis.set_ticks(np.arange(0, 1.1, 0.1)) plt.grid(b=True, which='both') plt.tight_layout() plt.savefig(os.path.join(outputDir, "froc_%s.png" % CADSystemName), bbox_inches=0, dpi=300) return (fps, sens, thresholds, fps_bs_itp, sens_bs_mean, sens_bs_lb, sens_bs_up)
def evaluateCAD(seriesUIDs, results_filename, outputDir, allNodules, CADSystemName, maxNumberOfCADMarks=-1, performBootstrapping=False, numberOfBootstrapSamples=1000, confidence=0.95): ''' function to evaluate a CAD algorithm @param seriesUIDs: 所有的测试集CT图像名称列表 @param results_filename: 提交的csv文件,*.csv @param outputDir: 存放F-ROC计算结果的文件夹路径 @param allNodules: 所有的nodule构成的字典,以图像名索引,GT @param CADSystemName: 系统名字,用来作为文件的前缀之类 @param maxNumberOfCADMarks: 一张CT图像最多允许多少条标注 @param performBootstrapping: @param numberOfBootstrapSamples: @param confidence: ''' nodOutputfile = open(os.path.join(outputDir, 'CADAnalysis.txt'), 'w') nodOutputfile.write("\n") nodOutputfile.write((60 * "*") + "\n") nodOutputfile.write("CAD Analysis: %s\n" % CADSystemName) nodOutputfile.write((60 * "*") + "\n") nodOutputfile.write("\n") results = csvTools.readCSV(results_filename) # 最终的csv文件结果 allCandsCAD = {} for seriesuid in seriesUIDs: # 对每一个测试图像ID # collect candidates from result file nodules = {} header = results[0] # csv文件第一行,表头 i = 0 for result in results[1:]: # 对于每一个标注 nodule_seriesuid = result[header.index(seriesuid_label)] # 该标注的文件名 if seriesuid == nodule_seriesuid: # 判断该标注的是否是suriesuid nodule = getNodule(result, header) nodule.candidateID = i nodules[nodule.candidateID] = nodule # 同一个ID的所有nodule i += 1 if (maxNumberOfCADMarks > 0): # 如果一张CT图像的标注超过某个值,就按照得分排序,只截取前maxNumberOfCADMarks条记录 if len(nodules.keys()) > maxNumberOfCADMarks: # make a list of all probabilities probs = [] for keytemp, noduletemp in nodules.iteritems(): probs.append(float(noduletemp.CADprobability)) probs.sort(reverse=True) # sort from large to small probThreshold = probs[maxNumberOfCADMarks] nodules2 = {} nrNodules2 = 0 for keytemp, noduletemp in nodules.iteritems(): if nrNodules2 >= maxNumberOfCADMarks: break if float(noduletemp.CADprobability) > probThreshold: nodules2[keytemp] = noduletemp nrNodules2 += 1 nodules = nodules2 print 'adding candidates: ' + seriesuid allCandsCAD[seriesuid] = nodules # 以图像名称索引nodule字典 # open output files nodNoCandFile = open( os.path.join(outputDir, "nodulesWithoutCandidate_%s.txt" % CADSystemName), 'w') # --- iterate over all cases (seriesUIDs) and determine how # often a nodule annotation is not covered by a candidate # initialize some variables to be used in the loop candTPs = 0 candFPs = 0 candFNs = 0 candTNs = 0 totalNumberOfCands = 0 totalNumberOfNodules = 0 doubleCandidatesIgnored = 0 irrelevantCandidates = 0 minProbValue = -1000000000.0 # minimum value of a float FROCGTList = [] FROCProbList = [] FPDivisorList = [] excludeList = [] FROCtoNoduleMap = [] ignoredCADMarksList = [] # -- loop over the cases for seriesuid in seriesUIDs: # 对于每一张CT图像 # get the candidates for this case try: candidates = allCandsCAD[seriesuid] # 该图像的预测标注信息 except KeyError: candidates = {} totalNumberOfCands += len(candidates.keys()) # 预测标注总个数 # make a copy in which items will be deleted candidates2 = candidates.copy() # 复制该图像的预测标注信息 # get the nodule annotations on this case try: noduleAnnots = allNodules[seriesuid] # 该图像的GT标注信息 except KeyError: noduleAnnots = [] # - loop over the nodule annotations for noduleAnnot in noduleAnnots: # 对GT标注中的每一条记录 # increment the number of nodules if noduleAnnot.state == "Included": # 该标注被用来计算结果 totalNumberOfNodules += 1 # 记录GT标注总数 x = float(noduleAnnot.coordX) # GT标注 X坐标 y = float(noduleAnnot.coordY) # GT标注 Y坐标 z = float(noduleAnnot.coordZ) # GT标注 Z坐标 # 2. Check if the nodule annotation is covered by a candidate # A nodule is marked as detected when the center of mass of the candidate is within a distance R of # the center of the nodule. In order to ensure that the CAD mark is displayed within the nodule on the # CT scan, we set R to be the radius of the nodule size. diameter = float(noduleAnnot.diameter_mm) # GT标注的直径 if diameter < 0.0: diameter = 5 radiusSquared = pow((diameter / 2.0), 2.0) # GT 半径的平方 found = False noduleMatches = [] for key, candidate in candidates.iteritems(): # 遍历预测的每一条标注 x2 = float(candidate.coordX) # 预测的坐标 X y2 = float(candidate.coordY) # 预测的坐标 Y z2 = float(candidate.coordZ) # 预测的坐标 Z dist = math.pow(x - x2, 2.) + math.pow(y - y2, 2.) + math.pow( z - z2, 2.) # 预测与真实的距离的差 if dist < radiusSquared: # 如果距离小于半径 if (noduleAnnot.state == "Included"): # 可被用于测评的标注 found = True noduleMatches.append( candidate) # 将该条预测的标注添加到 noduleMatches if key not in candidates2.keys(): print "This is strange: CAD mark %s detected two nodules! Check for overlapping nodule annotations, SeriesUID: %s, nodule Annot ID: %s" % ( str(candidate.id), seriesuid, str(noduleAnnot.id)) else: del candidates2[key] # 在candidates2中将相应数据删除 elif (noduleAnnot.state == "Excluded" ): # an excluded nodule if bOtherNodulesAsIrrelevant: # delete marks on excluded nodules so they don't count as false positives if key in candidates2.keys(): irrelevantCandidates += 1 ignoredCADMarksList.append( "%s,%s,%s,%s,%s,%s,%.9f" % (seriesuid, -1, candidate.coordX, candidate.coordY, candidate.coordZ, str(candidate.id), float(candidate.CADprobability))) del candidates2[key] if len(noduleMatches) > 1: # 如果预测的标注中有至少两个都预测到了GT的某一个nodule doubleCandidatesIgnored += (len(noduleMatches) - 1 ) # 舍弃多余的标注,记录舍弃的数目 if noduleAnnot.state == "Included": # 判断GT中的这条标注可被用来计算F-ROC # only include it for FROC analysis if it is included # otherwise, the candidate will not be counted as FP, but ignored in the # analysis since it has been deleted from the nodules2 vector of candidates if found == True: # 对该条GT标注,在预测标注中,找到了至少一条符合的 # append the sample with the highest probability for the FROC analysis maxProb = None for idx in range( len(noduleMatches)): # 对所有符合条件的预测,寻找最大的概率的一条 candidate = noduleMatches[idx] if (maxProb is None) or (float( candidate.CADprobability) > maxProb): maxProb = float(candidate.CADprobability) FROCGTList.append(1.0) FROCProbList.append(float(maxProb)) FPDivisorList.append(seriesuid) excludeList.append(False) FROCtoNoduleMap.append( "%s,%s,%s,%s,%s,%.9f,%s,%.9f" % (seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameter_mm), str( candidate.id), float(candidate.CADprobability))) candTPs += 1 else: candFNs += 1 # append a positive sample with the lowest probability, such that this is added in the FROC analysis FROCGTList.append(1.0) FROCProbList.append(minProbValue) FPDivisorList.append(seriesuid) excludeList.append(True) FROCtoNoduleMap.append( "%s,%s,%s,%s,%s,%.9f,%s,%s" % (seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameter_mm), int(-1), "NA")) nodNoCandFile.write( "%s,%s,%s,%s,%s,%.9f,%s\n" % (seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameter_mm), str(-1))) # add all false positives to the vectors for key, candidate3 in candidates2.iteritems(): candFPs += 1 FROCGTList.append(0.0) FROCProbList.append(float(candidate3.CADprobability)) FPDivisorList.append(seriesuid) excludeList.append(False) FROCtoNoduleMap.append( "%s,%s,%s,%s,%s,%s,%.9f" % (seriesuid, -1, candidate3.coordX, candidate3.coordY, candidate3.coordZ, str( candidate3.id), float(candidate3.CADprobability))) if not (len(FROCGTList) == len(FROCProbList) and len(FROCGTList) == len(FPDivisorList) and len(FROCGTList) == len(FROCtoNoduleMap) and len(FROCGTList) == len(excludeList)): nodOutputfile.write( "Length of FROC vectors not the same, this should never happen! Aborting..\n" ) nodOutputfile.write("Candidate detection results:\n") nodOutputfile.write(" True positives: %d\n" % candTPs) nodOutputfile.write(" False positives: %d\n" % candFPs) nodOutputfile.write(" False negatives: %d\n" % candFNs) nodOutputfile.write(" True negatives: %d\n" % candTNs) nodOutputfile.write(" Total number of candidates: %d\n" % totalNumberOfCands) nodOutputfile.write(" Total number of nodules: %d\n" % totalNumberOfNodules) nodOutputfile.write(" Ignored candidates on excluded nodules: %d\n" % irrelevantCandidates) nodOutputfile.write( " Ignored candidates which were double detections on a nodule: %d\n" % doubleCandidatesIgnored) if int(totalNumberOfNodules) == 0: nodOutputfile.write(" Sensitivity: 0.0\n") else: nodOutputfile.write(" Sensitivity: %.9f\n" % (float(candTPs) / float(totalNumberOfNodules))) nodOutputfile.write(" Average number of candidates per scan: %.9f\n" % (float(totalNumberOfCands) / float(len(seriesUIDs)))) # compute FROC fps, sens, thresholds = computeFROC(FROCGTList, FROCProbList, len(seriesUIDs), excludeList) if performBootstrapping: fps_bs_itp, sens_bs_mean, sens_bs_lb, sens_bs_up = computeFROC_bootstrap( FROCGTList, FROCProbList, FPDivisorList, seriesUIDs, excludeList, numberOfBootstrapSamples=numberOfBootstrapSamples, confidence=confidence) # Write FROC curve with open(os.path.join(outputDir, "froc_%s.txt" % CADSystemName), 'w') as f: for i in range(len(sens)): f.write("%.9f,%.9f,%.9f\n" % (fps[i], sens[i], thresholds[i])) # Write FROC vectors to disk as well with open( os.path.join(outputDir, "froc_gt_prob_vectors_%s.csv" % CADSystemName), 'w') as f: for i in range(len(FROCGTList)): f.write("%d,%.9f\n" % (FROCGTList[i], FROCProbList[i])) fps_itp = np.linspace(FROC_minX, FROC_maxX, num=10001) sens_itp = np.interp(fps_itp, fps, sens) sum_sensitivity = 0 for idx in range(len(fps_itp) - 1): if fps_itp[idx] < 0.125 and fps_itp[idx + 1] > 0.125: print("0.125:", sens_itp[idx]) sum_sensitivity += sens_itp[idx] if fps_itp[idx] < 0.25 and fps_itp[idx + 1] > 0.25: print("0.25:", sens_itp[idx]) sum_sensitivity += sens_itp[idx] if fps_itp[idx] < 0.5 and fps_itp[idx + 1] > 0.5: print("0.5:", sens_itp[idx]) sum_sensitivity += sens_itp[idx] if fps_itp[idx] < 1 and fps_itp[idx + 1] > 1: print("1:", sens_itp[idx]) sum_sensitivity += sens_itp[idx] if fps_itp[idx] < 2 and fps_itp[idx + 1] > 2: print("2:", sens_itp[idx]) sum_sensitivity += sens_itp[idx] if fps_itp[idx] < 4 and fps_itp[idx + 1] > 4: print("4:", sens_itp[idx]) sum_sensitivity += sens_itp[idx] if fps_itp[idx] < 8 and fps_itp[idx + 1] > 8: print("8:", sens_itp[idx]) sum_sensitivity += sens_itp[idx] ave_sensitivity = sum_sensitivity / 7.0 print("final score is %d" % ave_sensitivity) if performBootstrapping: # Write mean, lower, and upper bound curves to disk with open( os.path.join(outputDir, "froc_%s_bootstrapping.csv" % CADSystemName), 'w') as f: f.write( "FPrate,Sensivity[Mean],Sensivity[Lower bound],Sensivity[Upper bound]\n" ) for i in range(len(fps_bs_itp)): f.write("%.9f,%.9f,%.9f,%.9f\n" % (fps_bs_itp[i], sens_bs_mean[i], sens_bs_lb[i], sens_bs_up[i])) else: fps_bs_itp = None sens_bs_mean = None sens_bs_lb = None sens_bs_up = None # create FROC graphs if int(totalNumberOfNodules) > 0: graphTitle = str("") fig1 = plt.figure() ax = plt.gca() clr = 'b' plt.plot(fps_itp, sens_itp, color=clr, label="%s" % CADSystemName, lw=2) if performBootstrapping: plt.plot(fps_bs_itp, sens_bs_mean, color=clr, ls='--') plt.plot(fps_bs_itp, sens_bs_lb, color=clr, ls=':') # , label = "lb") plt.plot(fps_bs_itp, sens_bs_up, color=clr, ls=':') # , label = "ub") ax.fill_between(fps_bs_itp, sens_bs_lb, sens_bs_up, facecolor=clr, alpha=0.05) xmin = FROC_minX xmax = FROC_maxX plt.xlim(xmin, xmax) plt.ylim(0, 1) plt.xlabel('Average number of false positives per scan') plt.ylabel('Sensitivity') plt.legend(loc='lower right') plt.title('FROC performance - %s' % (CADSystemName)) if bLogPlot: plt.xscale('log', basex=2) ax.xaxis.set_major_formatter( FixedFormatter([0.125, 0.25, 0.5, 1, 2, 4, 8])) # set your ticks manually ax.xaxis.set_ticks([0.125, 0.25, 0.5, 1, 2, 4, 8]) ax.yaxis.set_ticks(np.arange(0, 1.1, 0.1)) plt.grid(b=True, which='both') plt.tight_layout() plt.savefig(os.path.join(outputDir, "froc_%s.png" % CADSystemName), bbox_inches=0, dpi=300) return (fps, sens, thresholds, fps_bs_itp, sens_bs_mean, sens_bs_lb, sens_bs_up)
def evaluateCAD(seriesUIDs, results_filename, outputDir, allNodules, CADSystemName, maxNumberOfCADMarks=-1, performBootstrapping=False, numberOfBootstrapSamples=1000, confidence=0.95): ''' 用于评估CAD算法的功能 @param seriesUIDs: 要处理的案例的seriesUID列表 @param results_filename: 带有结果的文件 @param outputDir:输出目录 @param allNodules: 所有案例的所有结节注释的字典,字典的键是seriesuids @param CADSystemName: CAD系统的名称,用于文件名和FROC曲线 ''' nodOutputfile = open(os.path.join(outputDir, 'CADAnalysis.txt'), 'w') nodOutputfile.write("\n") nodOutputfile.write((60 * "*") + "\n") nodOutputfile.write("CAD Analysis: %s\n" % CADSystemName) nodOutputfile.write((60 * "*") + "\n") nodOutputfile.write("\n") results = csvTools.readCSV(results_filename) allCandsCAD = {} for seriesuid in seriesUIDs: # 从结果文件中收集候选人 nodules = {} header = results[0] i = 0 for result in results[1:]: nodule_seriesuid = result[header.index(seriesuid_label)] if seriesuid == nodule_seriesuid: nodule = getNodule(result, header) nodule.candidateID = i nodules[nodule.candidateID] = nodule i += 1 if (maxNumberOfCADMarks > 0): # CAD标记的数量,只保留最可疑的标记 if len(nodules.keys()) > maxNumberOfCADMarks: # 列出所有概率 probs = [] for keytemp, noduletemp in nodules.iteritems(): probs.append(float(noduletemp.CADprobability)) probs.sort(reverse=True) # 从大到小排序 probThreshold = probs[maxNumberOfCADMarks] nodules2 = {} nrNodules2 = 0 for keytemp, noduletemp in nodules.iteritems(): if nrNodules2 >= maxNumberOfCADMarks: break if float(noduletemp.CADprobability) > probThreshold: nodules2[keytemp] = noduletemp nrNodules2 += 1 nodules = nodules2 print 'adding candidates: ' + seriesuid allCandsCAD[seriesuid] = nodules # 打开输出文件 nodNoCandFile = open( os.path.join(outputDir, "nodulesWithoutCandidate_%s.txt" % CADSystemName), 'w') # 迭代所有情况(seriesUID)并确定候选者不覆盖结节注释的频率 # 初始化一些要在循环中使用的变量 candTPs = 0 candFPs = 0 candFNs = 0 candTNs = 0 totalNumberOfCands = 0 totalNumberOfNodules = 0 doubleCandidatesIgnored = 0 irrelevantCandidates = 0 minProbValue = -1000000000.0 # 浮点数的最小值 FROCGTList = [] FROCProbList = [] FPDivisorList = [] excludeList = [] FROCtoNoduleMap = [] ignoredCADMarksList = [] # -- 循环案例 for seriesuid in seriesUIDs: # 得到这个案件的候选人 try: candidates = allCandsCAD[seriesuid] except KeyError: candidates = {} # 增加候选人总数 totalNumberOfCands += len(candidates.keys()) # 制作要删除项目的副本 candidates2 = candidates.copy() # 在这种情况下得到结节注释 try: noduleAnnots = allNodules[seriesuid] except KeyError: noduleAnnots = [] # - 循环结节标注 for noduleAnnot in noduleAnnots: # 增加结节数量 if noduleAnnot.state == "Included": totalNumberOfNodules += 1 x = float(noduleAnnot.coordX) y = float(noduleAnnot.coordY) z = float(noduleAnnot.coordZ) # 2. Check if the nodule annotation is covered by a candidate # A nodule is marked as detected when the center of mass of the candidate is within a distance R of # the center of the nodule. In order to ensure that the CAD mark is displayed within the nodule on the # CT scan, we set R to be the radius of the nodule size. diameter = float(noduleAnnot.diameter_mm) if diameter < 0.0: diameter = 10.0 radiusSquared = pow((diameter / 2.0), 2.0) found = False noduleMatches = [] for key, candidate in candidates.iteritems(): x2 = float(candidate.coordX) y2 = float(candidate.coordY) z2 = float(candidate.coordZ) dist = math.pow(x - x2, 2.) + math.pow(y - y2, 2.) + math.pow( z - z2, 2.) if dist < radiusSquared: if (noduleAnnot.state == "Included"): found = True noduleMatches.append(candidate) if key not in candidates2.keys(): print "This is strange: CAD mark %s detected two nodules! Check for overlapping nodule annotations, SeriesUID: %s, nodule Annot ID: %s" % ( str(candidate.id), seriesuid, str(noduleAnnot.id)) else: del candidates2[key] elif (noduleAnnot.state == "Excluded"): # 排除的结节 if bOtherNodulesAsIrrelevant: # 删除排除的结节上的标记,因此它们不算作负样本 if key in candidates2.keys(): irrelevantCandidates += 1 ignoredCADMarksList.append( "%s,%s,%s,%s,%s,%s,%.9f" % (seriesuid, -1, candidate.coordX, candidate.coordY, candidate.coordZ, str(candidate.id), float(candidate.CADprobability))) del candidates2[key] if len(noduleMatches) > 1: # 双重检测 doubleCandidatesIgnored += (len(noduleMatches) - 1) if noduleAnnot.state == "Included": # only include it for FROC analysis if it is included # otherwise, the candidate will not be counted as FP, but ignored in the # analysis since it has been deleted from the nodules2 vector of candidates if found == True: # 将样本附加到FROC分析的最高概率 maxProb = None for idx in range(len(noduleMatches)): candidate = noduleMatches[idx] if (maxProb is None) or (float( candidate.CADprobability) > maxProb): maxProb = float(candidate.CADprobability) FROCGTList.append(1.0) FROCProbList.append(float(maxProb)) FPDivisorList.append(seriesuid) excludeList.append(False) FROCtoNoduleMap.append( "%s,%s,%s,%s,%s,%.9f,%s,%.9f" % (seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameter_mm), str( candidate.id), float(candidate.CADprobability))) candTPs += 1 else: candFNs += 1 # 附加具有最低概率的阳性样本,以便在FROC分析中添加该样本 FROCGTList.append(1.0) FROCProbList.append(minProbValue) FPDivisorList.append(seriesuid) excludeList.append(True) FROCtoNoduleMap.append( "%s,%s,%s,%s,%s,%.9f,%s,%s" % (seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameter_mm), int(-1), "NA")) nodNoCandFile.write( "%s,%s,%s,%s,%s,%.9f,%s\n" % (seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameter_mm), str(-1))) # 将所有负样本添加到向量中 for key, candidate3 in candidates2.iteritems(): candFPs += 1 FROCGTList.append(0.0) FROCProbList.append(float(candidate3.CADprobability)) FPDivisorList.append(seriesuid) excludeList.append(False) FROCtoNoduleMap.append( "%s,%s,%s,%s,%s,%s,%.9f" % (seriesuid, -1, candidate3.coordX, candidate3.coordY, candidate3.coordZ, str( candidate3.id), float(candidate3.CADprobability))) if not (len(FROCGTList) == len(FROCProbList) and len(FROCGTList) == len(FPDivisorList) and len(FROCGTList) == len(FROCtoNoduleMap) and len(FROCGTList) == len(excludeList)): nodOutputfile.write( "Length of FROC vectors not the same, this should never happen! Aborting..\n" ) nodOutputfile.write("Candidate detection results:\n") nodOutputfile.write(" True positives: %d\n" % candTPs) nodOutputfile.write(" False positives: %d\n" % candFPs) nodOutputfile.write(" False negatives: %d\n" % candFNs) nodOutputfile.write(" True negatives: %d\n" % candTNs) nodOutputfile.write(" Total number of candidates: %d\n" % totalNumberOfCands) nodOutputfile.write(" Total number of nodules: %d\n" % totalNumberOfNodules) nodOutputfile.write(" Ignored candidates on excluded nodules: %d\n" % irrelevantCandidates) nodOutputfile.write( " Ignored candidates which were double detections on a nodule: %d\n" % doubleCandidatesIgnored) if int(totalNumberOfNodules) == 0: nodOutputfile.write(" Sensitivity: 0.0\n") else: nodOutputfile.write(" Sensitivity: %.9f\n" % (float(candTPs) / float(totalNumberOfNodules))) nodOutputfile.write(" Average number of candidates per scan: %.9f\n" % (float(totalNumberOfCands) / float(len(seriesUIDs)))) # 计算FROC fps, sens, thresholds = computeFROC(FROCGTList, FROCProbList, len(seriesUIDs), excludeList) if performBootstrapping: fps_bs_itp, sens_bs_mean, sens_bs_lb, sens_bs_up = computeFROC_bootstrap( FROCGTList, FROCProbList, FPDivisorList, seriesUIDs, excludeList, numberOfBootstrapSamples=numberOfBootstrapSamples, confidence=confidence) # 写FROC曲线 with open(os.path.join(outputDir, "froc_%s.txt" % CADSystemName), 'w') as f: for i in range(len(sens)): f.write("%.9f,%.9f,%.9f\n" % (fps[i], sens[i], thresholds[i])) # 将FROC向量写入磁盘 with open( os.path.join(outputDir, "froc_gt_prob_vectors_%s.csv" % CADSystemName), 'w') as f: for i in range(len(FROCGTList)): f.write("%d,%.9f\n" % (FROCGTList[i], FROCProbList[i])) fps_itp = np.linspace(FROC_minX, FROC_maxX, num=10001) sens_itp = np.interp(fps_itp, fps, sens) if performBootstrapping: # 将平均值,下限和上限曲线写入磁盘 with open( os.path.join(outputDir, "froc_%s_bootstrapping.csv" % CADSystemName), 'w') as f: f.write( "FPrate,Sensivity[Mean],Sensivity[Lower bound],Sensivity[Upper bound]\n" ) for i in range(len(fps_bs_itp)): f.write("%.9f,%.9f,%.9f,%.9f\n" % (fps_bs_itp[i], sens_bs_mean[i], sens_bs_lb[i], sens_bs_up[i])) else: fps_bs_itp = None sens_bs_mean = None sens_bs_lb = None sens_bs_up = None # 创建FROC图 if int(totalNumberOfNodules) > 0: graphTitle = str("") fig1 = plt.figure() ax = plt.gca() clr = 'b' plt.plot(fps_itp, sens_itp, color=clr, label="%s" % CADSystemName, lw=2) if performBootstrapping: plt.plot(fps_bs_itp, sens_bs_mean, color=clr, ls='--') plt.plot(fps_bs_itp, sens_bs_lb, color=clr, ls=':') # , label = "lb") plt.plot(fps_bs_itp, sens_bs_up, color=clr, ls=':') # , label = "ub") ax.fill_between(fps_bs_itp, sens_bs_lb, sens_bs_up, facecolor=clr, alpha=0.05) xmin = FROC_minX xmax = FROC_maxX plt.xlim(xmin, xmax) plt.ylim(0, 1) plt.xlabel('Average number of false positives per scan') plt.ylabel('Sensitivity') plt.legend(loc='lower right') plt.title('FROC performance - %s' % (CADSystemName)) if bLogPlot: plt.xscale('log', basex=2) ax.xaxis.set_major_formatter( FixedFormatter([0.125, 0.25, 0.5, 1, 2, 4, 8])) # set your ticks manually ax.xaxis.set_ticks([0.125, 0.25, 0.5, 1, 2, 4, 8]) ax.yaxis.set_ticks(np.arange(0, 1.1, 0.1)) plt.grid(b=True, which='both') plt.tight_layout() plt.savefig(os.path.join(outputDir, "froc_%s.png" % CADSystemName), bbox_inches=0, dpi=300) return (fps, sens, thresholds, fps_bs_itp, sens_bs_mean, sens_bs_lb, sens_bs_up)
def evaluateCAD(seriesUIDs, results_filename, outputDir, allNodules, CADSystemName, maxNumberOfCADMarks=-1, performBootstrapping=False,numberOfBootstrapSamples=1000,confidence = 0.95): ''' function to evaluate a CAD algorithm @param seriesUIDs: list of the seriesUIDs of the cases to be processed @param results_filename: file with results @param outputDir: output directory @param allNodules: dictionary with all nodule annotations of all cases, keys of the dictionary are the seriesuids @param CADSystemName: name of the CAD system, to be used in filenames and on FROC curve ''' nodOutputfile = open(os.path.join(outputDir,'CADAnalysis.txt'),'w') nodOutputfile.write("\n") nodOutputfile.write((60 * "*") + "\n") nodOutputfile.write("CAD Analysis: %s\n" % CADSystemName) nodOutputfile.write((60 * "*") + "\n") nodOutputfile.write("\n") results = csvTools.readCSV(results_filename) allCandsCAD = {} for seriesuid in seriesUIDs: # collect candidates from result file nodules = {} header = results[0] i = 0 for result in results[1:]: nodule_seriesuid = result[header.index(seriesuid_label)] if seriesuid == nodule_seriesuid: nodule = getNodule(result, header) nodule.candidateID = i nodules[nodule.candidateID] = nodule i += 1 if (maxNumberOfCADMarks > 0): # number of CAD marks, only keep must suspicous marks if len(nodules.keys()) > maxNumberOfCADMarks: # make a list of all probabilities probs = [] for keytemp, noduletemp in nodules.iteritems(): probs.append(float(noduletemp.CADprobability)) probs.sort(reverse=True) # sort from large to small probThreshold = probs[maxNumberOfCADMarks] nodules2 = {} nrNodules2 = 0 for keytemp, noduletemp in nodules.iteritems(): if nrNodules2 >= maxNumberOfCADMarks: break if float(noduletemp.CADprobability) > probThreshold: nodules2[keytemp] = noduletemp nrNodules2 += 1 nodules = nodules2 print 'adding candidates: ' + seriesuid allCandsCAD[seriesuid] = nodules # open output files nodNoCandFile = open(os.path.join(outputDir, "nodulesWithoutCandidate_%s.txt" % CADSystemName), 'w') # --- iterate over all cases (seriesUIDs) and determine how # often a nodule annotation is not covered by a candidate # initialize some variables to be used in the loop candTPs = 0 candFPs = 0 candFNs = 0 candTNs = 0 totalNumberOfCands = 0 totalNumberOfNodules = 0 doubleCandidatesIgnored = 0 irrelevantCandidates = 0 minProbValue = -1000000000.0 # minimum value of a float FROCGTList = [] FROCProbList = [] FPDivisorList = [] excludeList = [] FROCtoNoduleMap = [] ignoredCADMarksList = [] # -- loop over the cases for seriesuid in seriesUIDs: # get the candidates for this case try: candidates = allCandsCAD[seriesuid] except KeyError: candidates = {} # add to the total number of candidates totalNumberOfCands += len(candidates.keys()) # make a copy in which items will be deleted candidates2 = candidates.copy() # get the nodule annotations on this case try: noduleAnnots = allNodules[seriesuid] except KeyError: noduleAnnots = [] # - loop over the nodule annotations for noduleAnnot in noduleAnnots: # increment the number of nodules if noduleAnnot.state == "Included": totalNumberOfNodules += 1 x = float(noduleAnnot.coordX) y = float(noduleAnnot.coordY) z = float(noduleAnnot.coordZ) # 2. Check if the nodule annotation is covered by a candidate # A nodule is marked as detected when the center of mass of the candidate is within a distance R of # the center of the nodule. In order to ensure that the CAD mark is displayed within the nodule on the # CT scan, we set R to be the radius of the nodule size. diameter = float(noduleAnnot.diameter_mm) if diameter < 0.0: diameter = 10.0 radiusSquared = pow((diameter / 2.0), 2.0) found = False noduleMatches = [] for key, candidate in candidates.iteritems(): x2 = float(candidate.coordX) y2 = float(candidate.coordY) z2 = float(candidate.coordZ) dist = math.pow(x - x2, 2.) + math.pow(y - y2, 2.) + math.pow(z - z2, 2.) if dist < radiusSquared: if (noduleAnnot.state == "Included"): found = True noduleMatches.append(candidate) if key not in candidates2.keys(): print "This is strange: CAD mark %s detected two nodules! Check for overlapping nodule annotations, SeriesUID: %s, nodule Annot ID: %s" % (str(candidate.id), seriesuid, str(noduleAnnot.id)) else: del candidates2[key] elif (noduleAnnot.state == "Excluded"): # an excluded nodule if bOtherNodulesAsIrrelevant: # delete marks on excluded nodules so they don't count as false positives if key in candidates2.keys(): irrelevantCandidates += 1 ignoredCADMarksList.append("%s,%s,%s,%s,%s,%s,%.9f" % (seriesuid, -1, candidate.coordX, candidate.coordY, candidate.coordZ, str(candidate.id), float(candidate.CADprobability))) del candidates2[key] if len(noduleMatches) > 1: # double detection doubleCandidatesIgnored += (len(noduleMatches) - 1) if noduleAnnot.state == "Included": # only include it for FROC analysis if it is included # otherwise, the candidate will not be counted as FP, but ignored in the # analysis since it has been deleted from the nodules2 vector of candidates if found == True: # append the sample with the highest probability for the FROC analysis maxProb = None for idx in range(len(noduleMatches)): candidate = noduleMatches[idx] if (maxProb is None) or (float(candidate.CADprobability) > maxProb): maxProb = float(candidate.CADprobability) FROCGTList.append(1.0) FROCProbList.append(float(maxProb)) FPDivisorList.append(seriesuid) excludeList.append(False) FROCtoNoduleMap.append("%s,%s,%s,%s,%s,%.9f,%s,%.9f" % (seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameter_mm), str(candidate.id), float(candidate.CADprobability))) candTPs += 1 else: candFNs += 1 # append a positive sample with the lowest probability, such that this is added in the FROC analysis FROCGTList.append(1.0) FROCProbList.append(minProbValue) FPDivisorList.append(seriesuid) excludeList.append(True) FROCtoNoduleMap.append("%s,%s,%s,%s,%s,%.9f,%s,%s" % (seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameter_mm), int(-1), "NA")) nodNoCandFile.write("%s,%s,%s,%s,%s,%.9f,%s\n" % (seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameter_mm), str(-1))) # add all false positives to the vectors for key, candidate3 in candidates2.iteritems(): candFPs += 1 FROCGTList.append(0.0) FROCProbList.append(float(candidate3.CADprobability)) FPDivisorList.append(seriesuid) excludeList.append(False) FROCtoNoduleMap.append("%s,%s,%s,%s,%s,%s,%.9f" % (seriesuid, -1, candidate3.coordX, candidate3.coordY, candidate3.coordZ, str(candidate3.id), float(candidate3.CADprobability))) if not (len(FROCGTList) == len(FROCProbList) and len(FROCGTList) == len(FPDivisorList) and len(FROCGTList) == len(FROCtoNoduleMap) and len(FROCGTList) == len(excludeList)): nodOutputfile.write("Length of FROC vectors not the same, this should never happen! Aborting..\n") nodOutputfile.write("Candidate detection results:\n") nodOutputfile.write(" True positives: %d\n" % candTPs) nodOutputfile.write(" False positives: %d\n" % candFPs) nodOutputfile.write(" False negatives: %d\n" % candFNs) nodOutputfile.write(" True negatives: %d\n" % candTNs) nodOutputfile.write(" Total number of candidates: %d\n" % totalNumberOfCands) nodOutputfile.write(" Total number of nodules: %d\n" % totalNumberOfNodules) nodOutputfile.write(" Ignored candidates on excluded nodules: %d\n" % irrelevantCandidates) nodOutputfile.write(" Ignored candidates which were double detections on a nodule: %d\n" % doubleCandidatesIgnored) if int(totalNumberOfNodules) == 0: nodOutputfile.write(" Sensitivity: 0.0\n") else: nodOutputfile.write(" Sensitivity: %.9f\n" % (float(candTPs) / float(totalNumberOfNodules))) nodOutputfile.write(" Average number of candidates per scan: %.9f\n" % (float(totalNumberOfCands) / float(len(seriesUIDs)))) # compute FROC fps, sens, thresholds = computeFROC(FROCGTList,FROCProbList,len(seriesUIDs),excludeList) if performBootstrapping: fps_bs_itp,sens_bs_mean,sens_bs_lb,sens_bs_up = computeFROC_bootstrap(FROCGTList,FROCProbList,FPDivisorList,seriesUIDs,excludeList, numberOfBootstrapSamples=numberOfBootstrapSamples, confidence = confidence) # Write FROC curve with open(os.path.join(outputDir, "froc_%s.txt" % CADSystemName), 'w') as f: for i in range(len(sens)): f.write("%.9f,%.9f,%.9f\n" % (fps[i], sens[i], thresholds[i])) # Write FROC vectors to disk as well with open(os.path.join(outputDir, "froc_gt_prob_vectors_%s.csv" % CADSystemName), 'w') as f: for i in range(len(FROCGTList)): f.write("%d,%.9f\n" % (FROCGTList[i], FROCProbList[i])) fps_itp = np.linspace(FROC_minX, FROC_maxX, num=10001) sens_itp = np.interp(fps_itp, fps, sens) if performBootstrapping: # Write mean, lower, and upper bound curves to disk with open(os.path.join(outputDir, "froc_%s_bootstrapping.csv" % CADSystemName), 'w') as f: f.write("FPrate,Sensivity[Mean],Sensivity[Lower bound],Sensivity[Upper bound]\n") for i in range(len(fps_bs_itp)): f.write("%.9f,%.9f,%.9f,%.9f\n" % (fps_bs_itp[i], sens_bs_mean[i], sens_bs_lb[i], sens_bs_up[i])) else: fps_bs_itp = None sens_bs_mean = None sens_bs_lb = None sens_bs_up = None # create FROC graphs if int(totalNumberOfNodules) > 0: graphTitle = str("") fig1 = plt.figure() ax = plt.gca() clr = 'b' plt.plot(fps_itp, sens_itp, color=clr, label="%s" % CADSystemName, lw=2) if performBootstrapping: plt.plot(fps_bs_itp, sens_bs_mean, color=clr, ls='--') plt.plot(fps_bs_itp, sens_bs_lb, color=clr, ls=':') # , label = "lb") plt.plot(fps_bs_itp, sens_bs_up, color=clr, ls=':') # , label = "ub") ax.fill_between(fps_bs_itp, sens_bs_lb, sens_bs_up, facecolor=clr, alpha=0.05) xmin = FROC_minX xmax = FROC_maxX plt.xlim(xmin, xmax) plt.ylim(0, 1) plt.xlabel('Average number of false positives per scan') plt.ylabel('Sensitivity') plt.legend(loc='lower right') plt.title('FROC performance - %s' % (CADSystemName)) if bLogPlot: plt.xscale('log', basex=2) ax.xaxis.set_major_formatter(FixedFormatter([0.125,0.25,0.5,1,2,4,8])) # set your ticks manually ax.xaxis.set_ticks([0.125,0.25,0.5,1,2,4,8]) ax.yaxis.set_ticks(np.arange(0, 1.1, 0.1)) plt.grid(b=True, which='both') plt.tight_layout() plt.savefig(os.path.join(outputDir, "froc_%s.png" % CADSystemName), bbox_inches=0, dpi=300) return (fps, sens, thresholds, fps_bs_itp, sens_bs_mean, sens_bs_lb, sens_bs_up)