Esempio n. 1
0
def collect(annotations_filename,annotations_excluded_filename,seriesuids_filename,results_filename):
    annotations          = csvTools.readCSV(annotations_filename)
    annotations_excluded = csvTools.readCSV(annotations_excluded_filename)
    results              = csvTools.readCSV(results_filename)
    seriesUIDs_csv       = csvTools.readCSV(seriesuids_filename)

    seriesuid_results = []
    counter = 0;
    for row in results:
        if row[1] != 'seriesuid':
            if counter < 100000:
                seriesuid_results.append(row[1])
                counter += 1;

    annotations_new = []
    for row in annotations:
        if row[0] == 'seriesuid':
            annotations_new.append(row)
        elif row[0] in seriesuid_results:
            annotations_new.append(row)
    annotations = annotations_new

    seriesUIDs = []
    for seriesUID in seriesUIDs_csv:
        seriesUIDs.append(seriesUID[0])

    allNodules = collectNoduleAnnotations(annotations, annotations_excluded, seriesUIDs)

    return (allNodules, seriesUIDs, results)
def collect(annotations_filename, annotations_excluded_filename, seriesuids_filename):
    annotations = csvTools.readCSV(annotations_filename)
    annotations_excluded = csvTools.readCSV(annotations_excluded_filename)
    seriesUIDs_csv = csvTools.readCSV(seriesuids_filename)

    seriesUIDs = []
    for seriesUID in seriesUIDs_csv:
        seriesUIDs.append(seriesUID[0])

    allNodules = collectNoduleAnnotations(annotations, annotations_excluded, seriesUIDs)

    return (allNodules, seriesUIDs)
def collect(annotations_filename, seriesuids_filename):
    annotations = csvTools.readCSV(annotations_filename)  # 读取GT标注文件
    seriesUIDs_csv = csvTools.readCSV(seriesuids_filename)  # 读取GT文件名列表

    seriesUIDs = []
    for seriesUID in seriesUIDs_csv:  # 将CSV文件内容转化成一个list,每个元素为一个CT图像名
        seriesUIDs.append(seriesUID[0])

    allNodules = collectNoduleAnnotations(
        annotations, seriesUIDs)  # 返回的是所有的nodule,以CT图像名字索引

    return (allNodules, seriesUIDs)
def collect(annotations_filename,annotations_excluded_filename,seriesuids_filename):
    annotations          = csvTools.readCSV(annotations_filename)
    annotations_excluded = csvTools.readCSV(annotations_excluded_filename)
    seriesUIDs_csv = csvTools.readCSV(seriesuids_filename)
    
    seriesUIDs = []
    for seriesUID in seriesUIDs_csv:
        seriesUIDs.append(seriesUID[0])

    allNodules = collectNoduleAnnotations(annotations, annotations_excluded, seriesUIDs)
    
    return (allNodules, seriesUIDs)
def evaluateCAD(seriesUIDs,
                results_filename,
                outputDir,
                allNodules,
                CADSystemName,
                maxNumberOfCADMarks=-1,
                performBootstrapping=False,
                numberOfBootstrapSamples=1000,
                confidence=0.95):
    '''
    function to evaluate a CAD algorithm
    @param seriesUIDs: list of the seriesUIDs of the cases to be processed
    @param results_filename: file with results
    @param outputDir: output directory
    @param allNodules: dictionary with all nodule annotations of all cases, keys of the dictionary are the seriesuids
    @param CADSystemName: name of the CAD system, to be used in filenames and on FROC curve
    '''

    nodOutputfile = open(os.path.join(outputDir, 'CADAnalysis.txt'), 'w')
    nodOutputfile.write("\n")
    nodOutputfile.write((60 * "*") + "\n")
    nodOutputfile.write("CAD Analysis: %s\n" % CADSystemName)
    nodOutputfile.write((60 * "*") + "\n")
    nodOutputfile.write("\n")

    results = csvTools.readCSV(results_filename)

    allCandsCAD = {}

    for seriesuid in seriesUIDs:

        # collect candidates from result file
        nodules = {}
        header = results[0]

        i = 0
        for result in results[1:]:
            nodule_seriesuid = result[header.index(seriesuid_label)]

            if seriesuid == nodule_seriesuid:
                nodule = getNodule(result, header)
                nodule.candidateID = i
                nodules[nodule.candidateID] = nodule
                i += 1

        if (maxNumberOfCADMarks > 0):
            # number of CAD marks, only keep must suspicous marks

            if len(nodules.keys()) > maxNumberOfCADMarks:
                # make a list of all probabilities
                probs = []
                for keytemp, noduletemp in nodules.iteritems():
                    probs.append(float(noduletemp.CADprobability))
                probs.sort(reverse=True)  # sort from large to small
                probThreshold = probs[maxNumberOfCADMarks]
                nodules2 = {}
                nrNodules2 = 0
                for keytemp, noduletemp in nodules.iteritems():
                    if nrNodules2 >= maxNumberOfCADMarks:
                        break
                    if float(noduletemp.CADprobability) > probThreshold:
                        nodules2[keytemp] = noduletemp
                        nrNodules2 += 1

                nodules = nodules2

        # print 'adding candidates: ' + seriesuid
        allCandsCAD[seriesuid] = nodules

    # open output files
    nodNoCandFile = open(
        os.path.join(outputDir,
                     "nodulesWithoutCandidate_%s.txt" % CADSystemName), 'w')

    # --- iterate over all cases (seriesUIDs) and determine how
    # often a nodule annotation is not covered by a candidate

    # initialize some variables to be used in the loop
    candTPs = 0
    candFPs = 0
    candFNs = 0
    candTNs = 0
    totalNumberOfCands = 0
    totalNumberOfNodules = 0
    doubleCandidatesIgnored = 0
    irrelevantCandidates = 0
    minProbValue = -1000000000.0  # minimum value of a float
    FROCGTList = []
    FROCProbList = []
    FPDivisorList = []
    excludeList = []
    FROCtoNoduleMap = []
    ignoredCADMarksList = []

    # -- loop over the cases
    for seriesuid in seriesUIDs:
        # get the candidates for this case
        try:
            candidates = allCandsCAD[seriesuid]
        except KeyError:
            candidates = {}

        # add to the total number of candidates
        totalNumberOfCands += len(candidates.keys())

        # make a copy in which items will be deleted
        candidates2 = candidates.copy()

        # get the nodule annotations on this case
        try:
            noduleAnnots = allNodules[seriesuid]
        except KeyError:
            noduleAnnots = []

        # - loop over the nodule annotations
        for noduleAnnot in noduleAnnots:
            # increment the number of nodules
            if noduleAnnot.state == "Included":
                totalNumberOfNodules += 1

            x = float(noduleAnnot.coordX)
            y = float(noduleAnnot.coordY)
            z = float(noduleAnnot.coordZ)

            # 2. Check if the nodule annotation is covered by a candidate
            # A nodule is marked as detected when the center of mass of the candidate is within a distance R of
            # the center of the nodule. In order to ensure that the CAD mark is displayed within the nodule on the
            # CT scan, we set R to be the radius of the nodule size.
            diameter = float(noduleAnnot.diameter_mm)
            if diameter < 0.0:
                diameter = 10.0
            radiusSquared = pow((diameter / 2.0), 2.0)

            found = False
            noduleMatches = []
            for key, candidate in candidates.iteritems():
                x2 = float(candidate.coordX)
                y2 = float(candidate.coordY)
                z2 = float(candidate.coordZ)
                dist = math.pow(x - x2, 2.) + math.pow(y - y2, 2.) + math.pow(
                    z - z2, 2.)
                if dist < radiusSquared:
                    if (noduleAnnot.state == "Included"):
                        found = True
                        noduleMatches.append(candidate)
                        if key not in candidates2.keys():
                            print(
                                "This is strange: CAD mark %s detected two nodules! Check for overlapping nodule annotations, SeriesUID: %s, nodule Annot ID: %s"
                                % (str(candidate.id), seriesuid,
                                   str(noduleAnnot.id)))
                        else:
                            del candidates2[key]
                    elif (noduleAnnot.state == "Excluded"
                          ):  # an excluded nodule
                        if bOtherNodulesAsIrrelevant:  #    delete marks on excluded nodules so they don't count as false positives
                            if key in candidates2.keys():
                                irrelevantCandidates += 1
                                ignoredCADMarksList.append(
                                    "%s,%s,%s,%s,%s,%s,%.9f" %
                                    (seriesuid, -1, candidate.coordX,
                                     candidate.coordY, candidate.coordZ,
                                     str(candidate.id),
                                     float(candidate.CADprobability)))
                                del candidates2[key]
            if len(noduleMatches) > 1:  # double detection
                doubleCandidatesIgnored += (len(noduleMatches) - 1)
            if noduleAnnot.state == "Included":
                # only include it for FROC analysis if it is included
                # otherwise, the candidate will not be counted as FP, but ignored in the
                # analysis since it has been deleted from the nodules2 vector of candidates
                if found == True:
                    # append the sample with the highest probability for the FROC analysis
                    maxProb = None
                    for idx in range(len(noduleMatches)):
                        candidate = noduleMatches[idx]
                        if (maxProb is None) or (float(
                                candidate.CADprobability) > maxProb):
                            maxProb = float(candidate.CADprobability)

                    FROCGTList.append(1.0)
                    FROCProbList.append(float(maxProb))
                    FPDivisorList.append(seriesuid)
                    excludeList.append(False)
                    FROCtoNoduleMap.append(
                        "%s,%s,%s,%s,%s,%.9f,%s,%.9f" %
                        (seriesuid, noduleAnnot.id, noduleAnnot.coordX,
                         noduleAnnot.coordY, noduleAnnot.coordZ,
                         float(noduleAnnot.diameter_mm), str(
                             candidate.id), float(candidate.CADprobability)))
                    candTPs += 1
                else:
                    candFNs += 1
                    # append a positive sample with the lowest probability, such that this is added in the FROC analysis
                    FROCGTList.append(1.0)
                    FROCProbList.append(minProbValue)
                    FPDivisorList.append(seriesuid)
                    excludeList.append(True)
                    FROCtoNoduleMap.append(
                        "%s,%s,%s,%s,%s,%.9f,%s,%s" %
                        (seriesuid, noduleAnnot.id, noduleAnnot.coordX,
                         noduleAnnot.coordY, noduleAnnot.coordZ,
                         float(noduleAnnot.diameter_mm), int(-1), "NA"))
                    nodNoCandFile.write(
                        "%s,%s,%s,%s,%s,%.9f,%s\n" %
                        (seriesuid, noduleAnnot.id, noduleAnnot.coordX,
                         noduleAnnot.coordY, noduleAnnot.coordZ,
                         float(noduleAnnot.diameter_mm), str(-1)))

        # add all false positives to the vectors
        for key, candidate3 in candidates2.iteritems():
            candFPs += 1
            FROCGTList.append(0.0)
            FROCProbList.append(float(candidate3.CADprobability))
            FPDivisorList.append(seriesuid)
            excludeList.append(False)
            FROCtoNoduleMap.append(
                "%s,%s,%s,%s,%s,%s,%.9f" %
                (seriesuid, -1, candidate3.coordX, candidate3.coordY,
                 candidate3.coordZ, str(
                     candidate3.id), float(candidate3.CADprobability)))

    if not (len(FROCGTList) == len(FROCProbList) and len(FROCGTList)
            == len(FPDivisorList) and len(FROCGTList) == len(FROCtoNoduleMap)
            and len(FROCGTList) == len(excludeList)):
        nodOutputfile.write(
            "Length of FROC vectors not the same, this should never happen! Aborting..\n"
        )

    nodOutputfile.write("Candidate detection results:\n")
    nodOutputfile.write("    True positives: %d\n" % candTPs)
    nodOutputfile.write("    False positives: %d\n" % candFPs)
    nodOutputfile.write("    False negatives: %d\n" % candFNs)
    nodOutputfile.write("    True negatives: %d\n" % candTNs)
    nodOutputfile.write("    Total number of candidates: %d\n" %
                        totalNumberOfCands)
    nodOutputfile.write("    Total number of nodules: %d\n" %
                        totalNumberOfNodules)

    nodOutputfile.write("    Ignored candidates on excluded nodules: %d\n" %
                        irrelevantCandidates)
    nodOutputfile.write(
        "    Ignored candidates which were double detections on a nodule: %d\n"
        % doubleCandidatesIgnored)
    if int(totalNumberOfNodules) == 0:
        nodOutputfile.write("    Sensitivity: 0.0\n")
    else:
        nodOutputfile.write("    Sensitivity: %.9f\n" %
                            (float(candTPs) / float(totalNumberOfNodules)))
    nodOutputfile.write("    Average number of candidates per scan: %.9f\n" %
                        (float(totalNumberOfCands) / float(len(seriesUIDs))))

    # compute FROC
    fps, sens, thresholds = computeFROC(FROCGTList, FROCProbList,
                                        len(seriesUIDs), excludeList)

    if performBootstrapping:
        fps_bs_itp, sens_bs_mean, sens_bs_lb, sens_bs_up = computeFROC_bootstrap(
            FROCGTList,
            FROCProbList,
            FPDivisorList,
            seriesUIDs,
            excludeList,
            numberOfBootstrapSamples=numberOfBootstrapSamples,
            confidence=confidence)

    # Write FROC curve
    with open(os.path.join(outputDir, "froc_%s.txt" % CADSystemName),
              'w') as f:
        for i in range(len(sens)):
            f.write("%.9f,%.9f,%.9f\n" % (fps[i], sens[i], thresholds[i]))

    # Write FROC vectors to disk as well
    with open(
            os.path.join(outputDir,
                         "froc_gt_prob_vectors_%s.csv" % CADSystemName),
            'w') as f:
        for i in range(len(FROCGTList)):
            f.write("%d,%.9f\n" % (FROCGTList[i], FROCProbList[i]))

    fps_itp = np.linspace(FROC_minX, FROC_maxX, num=10001)

    sens_itp = np.interp(fps_itp, fps, sens)
    frvvlu = 0
    nxth = 0.125
    for fp, ss in zip(fps_itp, sens_itp):
        if abs(fp - nxth) < 3e-4:
            frvvlu += ss
            nxth *= 2
        if abs(nxth - 16) < 1e-5: break
    print(frvvlu / 7, nxth)
    print(sens_itp[fps_itp==0.125]+sens_itp[fps_itp==0.25]+sens_itp[fps_itp==0.5]+sens_itp[fps_itp==1]+sens_itp[fps_itp==2]\
        +sens_itp[fps_itp==4]+sens_itp[fps_itp==8])
    if performBootstrapping:
        # Write mean, lower, and upper bound curves to disk
        with open(
                os.path.join(outputDir,
                             "froc_%s_bootstrapping.csv" % CADSystemName),
                'w') as f:
            f.write(
                "FPrate,Sensivity[Mean],Sensivity[Lower bound],Sensivity[Upper bound]\n"
            )
            for i in range(len(fps_bs_itp)):
                f.write("%.9f,%.9f,%.9f,%.9f\n" %
                        (fps_bs_itp[i], sens_bs_mean[i], sens_bs_lb[i],
                         sens_bs_up[i]))
    else:
        fps_bs_itp = None
        sens_bs_mean = None
        sens_bs_lb = None
        sens_bs_up = None

    # create FROC graphs
    if int(totalNumberOfNodules) > 0:
        graphTitle = str("")
        fig1 = plt.figure()
        ax = plt.gca()
        clr = 'b'
        plt.plot(fps_itp,
                 sens_itp,
                 color=clr,
                 label="%s" % CADSystemName,
                 lw=2)
        if performBootstrapping:
            plt.plot(fps_bs_itp, sens_bs_mean, color=clr, ls='--')
            plt.plot(fps_bs_itp, sens_bs_lb, color=clr,
                     ls=':')  # , label = "lb")
            plt.plot(fps_bs_itp, sens_bs_up, color=clr,
                     ls=':')  # , label = "ub")
            ax.fill_between(fps_bs_itp,
                            sens_bs_lb,
                            sens_bs_up,
                            facecolor=clr,
                            alpha=0.05)
        xmin = FROC_minX
        xmax = FROC_maxX
        plt.xlim(xmin, xmax)
        plt.ylim(0.5, 1)
        plt.xlabel('Average number of false positives per scan')
        plt.ylabel('Sensitivity')
        plt.legend(loc='lower right')
        plt.title('FROC performance - %s' % (CADSystemName))

        if bLogPlot:
            plt.xscale('log', basex=2)
            ax.xaxis.set_major_formatter(
                FixedFormatter([0.125, 0.25, 0.5, 1, 2, 4, 8]))

        # set your ticks manually
        ax.xaxis.set_ticks([0.125, 0.25, 0.5, 1, 2, 4, 8])
        ax.yaxis.set_ticks(np.arange(0.5, 1, 0.1))
        # ax.yaxis.set_ticks(np.arange(0, 1.1, 0.1))
        plt.grid(b=True, which='both')
        plt.tight_layout()

        plt.savefig(os.path.join(outputDir, "froc_%s.png" % CADSystemName),
                    bbox_inches=0,
                    dpi=300)

    return (fps, sens, thresholds, fps_bs_itp, sens_bs_mean, sens_bs_lb,
            sens_bs_up)
def evaluateCAD(seriesUIDs,
                results_filename,
                outputDir,
                allNodules,
                CADSystemName,
                maxNumberOfCADMarks=-1,
                performBootstrapping=False,
                numberOfBootstrapSamples=1000,
                confidence=0.95):
    '''
    function to evaluate a CAD algorithm
    @param seriesUIDs: 所有的测试集CT图像名称列表
    @param results_filename: 提交的csv文件,*.csv
    @param outputDir: 存放F-ROC计算结果的文件夹路径
    @param allNodules: 所有的nodule构成的字典,以图像名索引,GT
    @param CADSystemName: 系统名字,用来作为文件的前缀之类
    @param maxNumberOfCADMarks: 一张CT图像最多允许多少条标注
    @param performBootstrapping:
    @param numberOfBootstrapSamples:
    @param confidence:
    '''

    nodOutputfile = open(os.path.join(outputDir, 'CADAnalysis.txt'), 'w')
    nodOutputfile.write("\n")
    nodOutputfile.write((60 * "*") + "\n")
    nodOutputfile.write("CAD Analysis: %s\n" % CADSystemName)
    nodOutputfile.write((60 * "*") + "\n")
    nodOutputfile.write("\n")

    results = csvTools.readCSV(results_filename)  # 最终的csv文件结果

    allCandsCAD = {}

    for seriesuid in seriesUIDs:  # 对每一个测试图像ID

        # collect candidates from result file
        nodules = {}
        header = results[0]  # csv文件第一行,表头

        i = 0
        for result in results[1:]:  # 对于每一个标注
            nodule_seriesuid = result[header.index(seriesuid_label)]  # 该标注的文件名

            if seriesuid == nodule_seriesuid:  # 判断该标注的是否是suriesuid
                nodule = getNodule(result, header)
                nodule.candidateID = i
                nodules[nodule.candidateID] = nodule  # 同一个ID的所有nodule
                i += 1

        if (maxNumberOfCADMarks > 0):
            # 如果一张CT图像的标注超过某个值,就按照得分排序,只截取前maxNumberOfCADMarks条记录
            if len(nodules.keys()) > maxNumberOfCADMarks:
                # make a list of all probabilities
                probs = []
                for keytemp, noduletemp in nodules.iteritems():
                    probs.append(float(noduletemp.CADprobability))
                probs.sort(reverse=True)  # sort from large to small
                probThreshold = probs[maxNumberOfCADMarks]
                nodules2 = {}
                nrNodules2 = 0
                for keytemp, noduletemp in nodules.iteritems():
                    if nrNodules2 >= maxNumberOfCADMarks:
                        break
                    if float(noduletemp.CADprobability) > probThreshold:
                        nodules2[keytemp] = noduletemp
                        nrNodules2 += 1

                nodules = nodules2

        print 'adding candidates: ' + seriesuid
        allCandsCAD[seriesuid] = nodules  # 以图像名称索引nodule字典

    # open output files
    nodNoCandFile = open(
        os.path.join(outputDir,
                     "nodulesWithoutCandidate_%s.txt" % CADSystemName), 'w')

    # --- iterate over all cases (seriesUIDs) and determine how
    # often a nodule annotation is not covered by a candidate

    # initialize some variables to be used in the loop
    candTPs = 0
    candFPs = 0
    candFNs = 0
    candTNs = 0
    totalNumberOfCands = 0
    totalNumberOfNodules = 0
    doubleCandidatesIgnored = 0
    irrelevantCandidates = 0
    minProbValue = -1000000000.0  # minimum value of a float
    FROCGTList = []
    FROCProbList = []
    FPDivisorList = []
    excludeList = []
    FROCtoNoduleMap = []
    ignoredCADMarksList = []

    # -- loop over the cases
    for seriesuid in seriesUIDs:  # 对于每一张CT图像
        # get the candidates for this case
        try:
            candidates = allCandsCAD[seriesuid]  # 该图像的预测标注信息
        except KeyError:
            candidates = {}

        totalNumberOfCands += len(candidates.keys())  # 预测标注总个数

        # make a copy in which items will be deleted
        candidates2 = candidates.copy()  # 复制该图像的预测标注信息

        # get the nodule annotations on this case
        try:
            noduleAnnots = allNodules[seriesuid]  # 该图像的GT标注信息
        except KeyError:
            noduleAnnots = []

        # - loop over the nodule annotations
        for noduleAnnot in noduleAnnots:  # 对GT标注中的每一条记录
            # increment the number of nodules
            if noduleAnnot.state == "Included":  # 该标注被用来计算结果
                totalNumberOfNodules += 1  # 记录GT标注总数

            x = float(noduleAnnot.coordX)  # GT标注 X坐标
            y = float(noduleAnnot.coordY)  # GT标注 Y坐标
            z = float(noduleAnnot.coordZ)  # GT标注 Z坐标

            # 2. Check if the nodule annotation is covered by a candidate
            # A nodule is marked as detected when the center of mass of the candidate is within a distance R of
            # the center of the nodule. In order to ensure that the CAD mark is displayed within the nodule on the
            # CT scan, we set R to be the radius of the nodule size.
            diameter = float(noduleAnnot.diameter_mm)  # GT标注的直径
            if diameter < 0.0:
                diameter = 5
            radiusSquared = pow((diameter / 2.0), 2.0)  # GT 半径的平方

            found = False
            noduleMatches = []
            for key, candidate in candidates.iteritems():  # 遍历预测的每一条标注
                x2 = float(candidate.coordX)  # 预测的坐标 X
                y2 = float(candidate.coordY)  # 预测的坐标 Y
                z2 = float(candidate.coordZ)  # 预测的坐标 Z
                dist = math.pow(x - x2, 2.) + math.pow(y - y2, 2.) + math.pow(
                    z - z2, 2.)  # 预测与真实的距离的差
                if dist < radiusSquared:  # 如果距离小于半径
                    if (noduleAnnot.state == "Included"):  # 可被用于测评的标注
                        found = True
                        noduleMatches.append(
                            candidate)  # 将该条预测的标注添加到 noduleMatches
                        if key not in candidates2.keys():
                            print "This is strange: CAD mark %s detected two nodules! Check for overlapping nodule annotations, SeriesUID: %s, nodule Annot ID: %s" % (
                                str(candidate.id), seriesuid,
                                str(noduleAnnot.id))
                        else:
                            del candidates2[key]  # 在candidates2中将相应数据删除
                    elif (noduleAnnot.state == "Excluded"
                          ):  # an excluded nodule
                        if bOtherNodulesAsIrrelevant:  #    delete marks on excluded nodules so they don't count as false positives
                            if key in candidates2.keys():
                                irrelevantCandidates += 1
                                ignoredCADMarksList.append(
                                    "%s,%s,%s,%s,%s,%s,%.9f" %
                                    (seriesuid, -1, candidate.coordX,
                                     candidate.coordY, candidate.coordZ,
                                     str(candidate.id),
                                     float(candidate.CADprobability)))
                                del candidates2[key]
            if len(noduleMatches) > 1:  # 如果预测的标注中有至少两个都预测到了GT的某一个nodule
                doubleCandidatesIgnored += (len(noduleMatches) - 1
                                            )  # 舍弃多余的标注,记录舍弃的数目
            if noduleAnnot.state == "Included":  # 判断GT中的这条标注可被用来计算F-ROC
                # only include it for FROC analysis if it is included
                # otherwise, the candidate will not be counted as FP, but ignored in the
                # analysis since it has been deleted from the nodules2 vector of candidates
                if found == True:  # 对该条GT标注,在预测标注中,找到了至少一条符合的
                    # append the sample with the highest probability for the FROC analysis
                    maxProb = None
                    for idx in range(
                            len(noduleMatches)):  # 对所有符合条件的预测,寻找最大的概率的一条
                        candidate = noduleMatches[idx]
                        if (maxProb is None) or (float(
                                candidate.CADprobability) > maxProb):
                            maxProb = float(candidate.CADprobability)

                    FROCGTList.append(1.0)
                    FROCProbList.append(float(maxProb))
                    FPDivisorList.append(seriesuid)
                    excludeList.append(False)
                    FROCtoNoduleMap.append(
                        "%s,%s,%s,%s,%s,%.9f,%s,%.9f" %
                        (seriesuid, noduleAnnot.id, noduleAnnot.coordX,
                         noduleAnnot.coordY, noduleAnnot.coordZ,
                         float(noduleAnnot.diameter_mm), str(
                             candidate.id), float(candidate.CADprobability)))
                    candTPs += 1
                else:
                    candFNs += 1
                    # append a positive sample with the lowest probability, such that this is added in the FROC analysis
                    FROCGTList.append(1.0)
                    FROCProbList.append(minProbValue)
                    FPDivisorList.append(seriesuid)
                    excludeList.append(True)
                    FROCtoNoduleMap.append(
                        "%s,%s,%s,%s,%s,%.9f,%s,%s" %
                        (seriesuid, noduleAnnot.id, noduleAnnot.coordX,
                         noduleAnnot.coordY, noduleAnnot.coordZ,
                         float(noduleAnnot.diameter_mm), int(-1), "NA"))
                    nodNoCandFile.write(
                        "%s,%s,%s,%s,%s,%.9f,%s\n" %
                        (seriesuid, noduleAnnot.id, noduleAnnot.coordX,
                         noduleAnnot.coordY, noduleAnnot.coordZ,
                         float(noduleAnnot.diameter_mm), str(-1)))

        # add all false positives to the vectors
        for key, candidate3 in candidates2.iteritems():
            candFPs += 1
            FROCGTList.append(0.0)
            FROCProbList.append(float(candidate3.CADprobability))
            FPDivisorList.append(seriesuid)
            excludeList.append(False)
            FROCtoNoduleMap.append(
                "%s,%s,%s,%s,%s,%s,%.9f" %
                (seriesuid, -1, candidate3.coordX, candidate3.coordY,
                 candidate3.coordZ, str(
                     candidate3.id), float(candidate3.CADprobability)))

    if not (len(FROCGTList) == len(FROCProbList) and len(FROCGTList)
            == len(FPDivisorList) and len(FROCGTList) == len(FROCtoNoduleMap)
            and len(FROCGTList) == len(excludeList)):
        nodOutputfile.write(
            "Length of FROC vectors not the same, this should never happen! Aborting..\n"
        )

    nodOutputfile.write("Candidate detection results:\n")
    nodOutputfile.write("    True positives: %d\n" % candTPs)
    nodOutputfile.write("    False positives: %d\n" % candFPs)
    nodOutputfile.write("    False negatives: %d\n" % candFNs)
    nodOutputfile.write("    True negatives: %d\n" % candTNs)
    nodOutputfile.write("    Total number of candidates: %d\n" %
                        totalNumberOfCands)
    nodOutputfile.write("    Total number of nodules: %d\n" %
                        totalNumberOfNodules)

    nodOutputfile.write("    Ignored candidates on excluded nodules: %d\n" %
                        irrelevantCandidates)
    nodOutputfile.write(
        "    Ignored candidates which were double detections on a nodule: %d\n"
        % doubleCandidatesIgnored)
    if int(totalNumberOfNodules) == 0:
        nodOutputfile.write("    Sensitivity: 0.0\n")
    else:
        nodOutputfile.write("    Sensitivity: %.9f\n" %
                            (float(candTPs) / float(totalNumberOfNodules)))
    nodOutputfile.write("    Average number of candidates per scan: %.9f\n" %
                        (float(totalNumberOfCands) / float(len(seriesUIDs))))

    # compute FROC
    fps, sens, thresholds = computeFROC(FROCGTList, FROCProbList,
                                        len(seriesUIDs), excludeList)

    if performBootstrapping:
        fps_bs_itp, sens_bs_mean, sens_bs_lb, sens_bs_up = computeFROC_bootstrap(
            FROCGTList,
            FROCProbList,
            FPDivisorList,
            seriesUIDs,
            excludeList,
            numberOfBootstrapSamples=numberOfBootstrapSamples,
            confidence=confidence)

    # Write FROC curve
    with open(os.path.join(outputDir, "froc_%s.txt" % CADSystemName),
              'w') as f:
        for i in range(len(sens)):
            f.write("%.9f,%.9f,%.9f\n" % (fps[i], sens[i], thresholds[i]))

    # Write FROC vectors to disk as well
    with open(
            os.path.join(outputDir,
                         "froc_gt_prob_vectors_%s.csv" % CADSystemName),
            'w') as f:
        for i in range(len(FROCGTList)):
            f.write("%d,%.9f\n" % (FROCGTList[i], FROCProbList[i]))

    fps_itp = np.linspace(FROC_minX, FROC_maxX, num=10001)

    sens_itp = np.interp(fps_itp, fps, sens)

    sum_sensitivity = 0
    for idx in range(len(fps_itp) - 1):
        if fps_itp[idx] < 0.125 and fps_itp[idx + 1] > 0.125:
            print("0.125:", sens_itp[idx])
            sum_sensitivity += sens_itp[idx]
        if fps_itp[idx] < 0.25 and fps_itp[idx + 1] > 0.25:
            print("0.25:", sens_itp[idx])
            sum_sensitivity += sens_itp[idx]
        if fps_itp[idx] < 0.5 and fps_itp[idx + 1] > 0.5:
            print("0.5:", sens_itp[idx])
            sum_sensitivity += sens_itp[idx]
        if fps_itp[idx] < 1 and fps_itp[idx + 1] > 1:
            print("1:", sens_itp[idx])
            sum_sensitivity += sens_itp[idx]
        if fps_itp[idx] < 2 and fps_itp[idx + 1] > 2:
            print("2:", sens_itp[idx])
            sum_sensitivity += sens_itp[idx]
        if fps_itp[idx] < 4 and fps_itp[idx + 1] > 4:
            print("4:", sens_itp[idx])
            sum_sensitivity += sens_itp[idx]
        if fps_itp[idx] < 8 and fps_itp[idx + 1] > 8:
            print("8:", sens_itp[idx])
            sum_sensitivity += sens_itp[idx]
    ave_sensitivity = sum_sensitivity / 7.0
    print("final score is %d" % ave_sensitivity)
    if performBootstrapping:
        # Write mean, lower, and upper bound curves to disk
        with open(
                os.path.join(outputDir,
                             "froc_%s_bootstrapping.csv" % CADSystemName),
                'w') as f:
            f.write(
                "FPrate,Sensivity[Mean],Sensivity[Lower bound],Sensivity[Upper bound]\n"
            )
            for i in range(len(fps_bs_itp)):
                f.write("%.9f,%.9f,%.9f,%.9f\n" %
                        (fps_bs_itp[i], sens_bs_mean[i], sens_bs_lb[i],
                         sens_bs_up[i]))
    else:
        fps_bs_itp = None
        sens_bs_mean = None
        sens_bs_lb = None
        sens_bs_up = None

    # create FROC graphs
    if int(totalNumberOfNodules) > 0:
        graphTitle = str("")
        fig1 = plt.figure()
        ax = plt.gca()
        clr = 'b'
        plt.plot(fps_itp,
                 sens_itp,
                 color=clr,
                 label="%s" % CADSystemName,
                 lw=2)
        if performBootstrapping:
            plt.plot(fps_bs_itp, sens_bs_mean, color=clr, ls='--')
            plt.plot(fps_bs_itp, sens_bs_lb, color=clr,
                     ls=':')  # , label = "lb")
            plt.plot(fps_bs_itp, sens_bs_up, color=clr,
                     ls=':')  # , label = "ub")
            ax.fill_between(fps_bs_itp,
                            sens_bs_lb,
                            sens_bs_up,
                            facecolor=clr,
                            alpha=0.05)
        xmin = FROC_minX
        xmax = FROC_maxX
        plt.xlim(xmin, xmax)
        plt.ylim(0, 1)
        plt.xlabel('Average number of false positives per scan')
        plt.ylabel('Sensitivity')
        plt.legend(loc='lower right')
        plt.title('FROC performance - %s' % (CADSystemName))

        if bLogPlot:
            plt.xscale('log', basex=2)
            ax.xaxis.set_major_formatter(
                FixedFormatter([0.125, 0.25, 0.5, 1, 2, 4, 8]))

        # set your ticks manually
        ax.xaxis.set_ticks([0.125, 0.25, 0.5, 1, 2, 4, 8])
        ax.yaxis.set_ticks(np.arange(0, 1.1, 0.1))
        plt.grid(b=True, which='both')
        plt.tight_layout()

        plt.savefig(os.path.join(outputDir, "froc_%s.png" % CADSystemName),
                    bbox_inches=0,
                    dpi=300)

    return (fps, sens, thresholds, fps_bs_itp, sens_bs_mean, sens_bs_lb,
            sens_bs_up)
Esempio n. 7
0
def evaluateCAD(seriesUIDs,
                results_filename,
                outputDir,
                allNodules,
                CADSystemName,
                maxNumberOfCADMarks=-1,
                performBootstrapping=False,
                numberOfBootstrapSamples=1000,
                confidence=0.95):
    '''
    用于评估CAD算法的功能
    @param seriesUIDs: 要处理的案例的seriesUID列表
    @param results_filename: 带有结果的文件
    @param outputDir:输出目录
    @param allNodules: 所有案例的所有结节注释的字典,字典的键是seriesuids
    @param CADSystemName: CAD系统的名称,用于文件名和FROC曲线
    '''

    nodOutputfile = open(os.path.join(outputDir, 'CADAnalysis.txt'), 'w')
    nodOutputfile.write("\n")
    nodOutputfile.write((60 * "*") + "\n")
    nodOutputfile.write("CAD Analysis: %s\n" % CADSystemName)
    nodOutputfile.write((60 * "*") + "\n")
    nodOutputfile.write("\n")

    results = csvTools.readCSV(results_filename)

    allCandsCAD = {}

    for seriesuid in seriesUIDs:

        # 从结果文件中收集候选人
        nodules = {}
        header = results[0]

        i = 0
        for result in results[1:]:
            nodule_seriesuid = result[header.index(seriesuid_label)]

            if seriesuid == nodule_seriesuid:
                nodule = getNodule(result, header)
                nodule.candidateID = i
                nodules[nodule.candidateID] = nodule
                i += 1

        if (maxNumberOfCADMarks > 0):
            # CAD标记的数量,只保留最可疑的标记

            if len(nodules.keys()) > maxNumberOfCADMarks:
                # 列出所有概率
                probs = []
                for keytemp, noduletemp in nodules.iteritems():
                    probs.append(float(noduletemp.CADprobability))
                probs.sort(reverse=True)  # 从大到小排序
                probThreshold = probs[maxNumberOfCADMarks]
                nodules2 = {}
                nrNodules2 = 0
                for keytemp, noduletemp in nodules.iteritems():
                    if nrNodules2 >= maxNumberOfCADMarks:
                        break
                    if float(noduletemp.CADprobability) > probThreshold:
                        nodules2[keytemp] = noduletemp
                        nrNodules2 += 1

                nodules = nodules2

        print 'adding candidates: ' + seriesuid
        allCandsCAD[seriesuid] = nodules

    # 打开输出文件
    nodNoCandFile = open(
        os.path.join(outputDir,
                     "nodulesWithoutCandidate_%s.txt" % CADSystemName), 'w')

    # 迭代所有情况(seriesUID)并确定候选者不覆盖结节注释的频率

    # 初始化一些要在循环中使用的变量
    candTPs = 0
    candFPs = 0
    candFNs = 0
    candTNs = 0
    totalNumberOfCands = 0
    totalNumberOfNodules = 0
    doubleCandidatesIgnored = 0
    irrelevantCandidates = 0
    minProbValue = -1000000000.0  # 浮点数的最小值
    FROCGTList = []
    FROCProbList = []
    FPDivisorList = []
    excludeList = []
    FROCtoNoduleMap = []
    ignoredCADMarksList = []

    # -- 循环案例
    for seriesuid in seriesUIDs:
        # 得到这个案件的候选人
        try:
            candidates = allCandsCAD[seriesuid]
        except KeyError:
            candidates = {}

        # 增加候选人总数
        totalNumberOfCands += len(candidates.keys())

        # 制作要删除项目的副本
        candidates2 = candidates.copy()

        # 在这种情况下得到结节注释
        try:
            noduleAnnots = allNodules[seriesuid]
        except KeyError:
            noduleAnnots = []

        # - 循环结节标注
        for noduleAnnot in noduleAnnots:
            # 增加结节数量
            if noduleAnnot.state == "Included":
                totalNumberOfNodules += 1

            x = float(noduleAnnot.coordX)
            y = float(noduleAnnot.coordY)
            z = float(noduleAnnot.coordZ)

            # 2. Check if the nodule annotation is covered by a candidate
            # A nodule is marked as detected when the center of mass of the candidate is within a distance R of
            # the center of the nodule. In order to ensure that the CAD mark is displayed within the nodule on the
            # CT scan, we set R to be the radius of the nodule size.
            diameter = float(noduleAnnot.diameter_mm)
            if diameter < 0.0:
                diameter = 10.0
            radiusSquared = pow((diameter / 2.0), 2.0)

            found = False
            noduleMatches = []
            for key, candidate in candidates.iteritems():
                x2 = float(candidate.coordX)
                y2 = float(candidate.coordY)
                z2 = float(candidate.coordZ)
                dist = math.pow(x - x2, 2.) + math.pow(y - y2, 2.) + math.pow(
                    z - z2, 2.)
                if dist < radiusSquared:
                    if (noduleAnnot.state == "Included"):
                        found = True
                        noduleMatches.append(candidate)
                        if key not in candidates2.keys():
                            print "This is strange: CAD mark %s detected two nodules! Check for overlapping nodule annotations, SeriesUID: %s, nodule Annot ID: %s" % (
                                str(candidate.id), seriesuid,
                                str(noduleAnnot.id))
                        else:
                            del candidates2[key]
                    elif (noduleAnnot.state == "Excluded"):  # 排除的结节
                        if bOtherNodulesAsIrrelevant:  # 删除排除的结节上的标记,因此它们不算作负样本
                            if key in candidates2.keys():
                                irrelevantCandidates += 1
                                ignoredCADMarksList.append(
                                    "%s,%s,%s,%s,%s,%s,%.9f" %
                                    (seriesuid, -1, candidate.coordX,
                                     candidate.coordY, candidate.coordZ,
                                     str(candidate.id),
                                     float(candidate.CADprobability)))
                                del candidates2[key]
            if len(noduleMatches) > 1:  # 双重检测
                doubleCandidatesIgnored += (len(noduleMatches) - 1)
            if noduleAnnot.state == "Included":
                # only include it for FROC analysis if it is included
                # otherwise, the candidate will not be counted as FP, but ignored in the
                # analysis since it has been deleted from the nodules2 vector of candidates
                if found == True:
                    # 将样本附加到FROC分析的最高概率
                    maxProb = None
                    for idx in range(len(noduleMatches)):
                        candidate = noduleMatches[idx]
                        if (maxProb is None) or (float(
                                candidate.CADprobability) > maxProb):
                            maxProb = float(candidate.CADprobability)

                    FROCGTList.append(1.0)
                    FROCProbList.append(float(maxProb))
                    FPDivisorList.append(seriesuid)
                    excludeList.append(False)
                    FROCtoNoduleMap.append(
                        "%s,%s,%s,%s,%s,%.9f,%s,%.9f" %
                        (seriesuid, noduleAnnot.id, noduleAnnot.coordX,
                         noduleAnnot.coordY, noduleAnnot.coordZ,
                         float(noduleAnnot.diameter_mm), str(
                             candidate.id), float(candidate.CADprobability)))
                    candTPs += 1
                else:
                    candFNs += 1
                    # 附加具有最低概率的阳性样本,以便在FROC分析中添加该样本
                    FROCGTList.append(1.0)
                    FROCProbList.append(minProbValue)
                    FPDivisorList.append(seriesuid)
                    excludeList.append(True)
                    FROCtoNoduleMap.append(
                        "%s,%s,%s,%s,%s,%.9f,%s,%s" %
                        (seriesuid, noduleAnnot.id, noduleAnnot.coordX,
                         noduleAnnot.coordY, noduleAnnot.coordZ,
                         float(noduleAnnot.diameter_mm), int(-1), "NA"))
                    nodNoCandFile.write(
                        "%s,%s,%s,%s,%s,%.9f,%s\n" %
                        (seriesuid, noduleAnnot.id, noduleAnnot.coordX,
                         noduleAnnot.coordY, noduleAnnot.coordZ,
                         float(noduleAnnot.diameter_mm), str(-1)))

        # 将所有负样本添加到向量中
        for key, candidate3 in candidates2.iteritems():
            candFPs += 1
            FROCGTList.append(0.0)
            FROCProbList.append(float(candidate3.CADprobability))
            FPDivisorList.append(seriesuid)
            excludeList.append(False)
            FROCtoNoduleMap.append(
                "%s,%s,%s,%s,%s,%s,%.9f" %
                (seriesuid, -1, candidate3.coordX, candidate3.coordY,
                 candidate3.coordZ, str(
                     candidate3.id), float(candidate3.CADprobability)))

    if not (len(FROCGTList) == len(FROCProbList) and len(FROCGTList)
            == len(FPDivisorList) and len(FROCGTList) == len(FROCtoNoduleMap)
            and len(FROCGTList) == len(excludeList)):
        nodOutputfile.write(
            "Length of FROC vectors not the same, this should never happen! Aborting..\n"
        )

    nodOutputfile.write("Candidate detection results:\n")
    nodOutputfile.write("    True positives: %d\n" % candTPs)
    nodOutputfile.write("    False positives: %d\n" % candFPs)
    nodOutputfile.write("    False negatives: %d\n" % candFNs)
    nodOutputfile.write("    True negatives: %d\n" % candTNs)
    nodOutputfile.write("    Total number of candidates: %d\n" %
                        totalNumberOfCands)
    nodOutputfile.write("    Total number of nodules: %d\n" %
                        totalNumberOfNodules)

    nodOutputfile.write("    Ignored candidates on excluded nodules: %d\n" %
                        irrelevantCandidates)
    nodOutputfile.write(
        "    Ignored candidates which were double detections on a nodule: %d\n"
        % doubleCandidatesIgnored)
    if int(totalNumberOfNodules) == 0:
        nodOutputfile.write("    Sensitivity: 0.0\n")
    else:
        nodOutputfile.write("    Sensitivity: %.9f\n" %
                            (float(candTPs) / float(totalNumberOfNodules)))
    nodOutputfile.write("    Average number of candidates per scan: %.9f\n" %
                        (float(totalNumberOfCands) / float(len(seriesUIDs))))

    # 计算FROC
    fps, sens, thresholds = computeFROC(FROCGTList, FROCProbList,
                                        len(seriesUIDs), excludeList)

    if performBootstrapping:
        fps_bs_itp, sens_bs_mean, sens_bs_lb, sens_bs_up = computeFROC_bootstrap(
            FROCGTList,
            FROCProbList,
            FPDivisorList,
            seriesUIDs,
            excludeList,
            numberOfBootstrapSamples=numberOfBootstrapSamples,
            confidence=confidence)

    # 写FROC曲线
    with open(os.path.join(outputDir, "froc_%s.txt" % CADSystemName),
              'w') as f:
        for i in range(len(sens)):
            f.write("%.9f,%.9f,%.9f\n" % (fps[i], sens[i], thresholds[i]))

    # 将FROC向量写入磁盘
    with open(
            os.path.join(outputDir,
                         "froc_gt_prob_vectors_%s.csv" % CADSystemName),
            'w') as f:
        for i in range(len(FROCGTList)):
            f.write("%d,%.9f\n" % (FROCGTList[i], FROCProbList[i]))

    fps_itp = np.linspace(FROC_minX, FROC_maxX, num=10001)

    sens_itp = np.interp(fps_itp, fps, sens)

    if performBootstrapping:
        # 将平均值,下限和上限曲线写入磁盘
        with open(
                os.path.join(outputDir,
                             "froc_%s_bootstrapping.csv" % CADSystemName),
                'w') as f:
            f.write(
                "FPrate,Sensivity[Mean],Sensivity[Lower bound],Sensivity[Upper bound]\n"
            )
            for i in range(len(fps_bs_itp)):
                f.write("%.9f,%.9f,%.9f,%.9f\n" %
                        (fps_bs_itp[i], sens_bs_mean[i], sens_bs_lb[i],
                         sens_bs_up[i]))
    else:
        fps_bs_itp = None
        sens_bs_mean = None
        sens_bs_lb = None
        sens_bs_up = None

    # 创建FROC图
    if int(totalNumberOfNodules) > 0:
        graphTitle = str("")
        fig1 = plt.figure()
        ax = plt.gca()
        clr = 'b'
        plt.plot(fps_itp,
                 sens_itp,
                 color=clr,
                 label="%s" % CADSystemName,
                 lw=2)
        if performBootstrapping:
            plt.plot(fps_bs_itp, sens_bs_mean, color=clr, ls='--')
            plt.plot(fps_bs_itp, sens_bs_lb, color=clr,
                     ls=':')  # , label = "lb")
            plt.plot(fps_bs_itp, sens_bs_up, color=clr,
                     ls=':')  # , label = "ub")
            ax.fill_between(fps_bs_itp,
                            sens_bs_lb,
                            sens_bs_up,
                            facecolor=clr,
                            alpha=0.05)
        xmin = FROC_minX
        xmax = FROC_maxX
        plt.xlim(xmin, xmax)
        plt.ylim(0, 1)
        plt.xlabel('Average number of false positives per scan')
        plt.ylabel('Sensitivity')
        plt.legend(loc='lower right')
        plt.title('FROC performance - %s' % (CADSystemName))

        if bLogPlot:
            plt.xscale('log', basex=2)
            ax.xaxis.set_major_formatter(
                FixedFormatter([0.125, 0.25, 0.5, 1, 2, 4, 8]))

        # set your ticks manually
        ax.xaxis.set_ticks([0.125, 0.25, 0.5, 1, 2, 4, 8])
        ax.yaxis.set_ticks(np.arange(0, 1.1, 0.1))
        plt.grid(b=True, which='both')
        plt.tight_layout()

        plt.savefig(os.path.join(outputDir, "froc_%s.png" % CADSystemName),
                    bbox_inches=0,
                    dpi=300)

    return (fps, sens, thresholds, fps_bs_itp, sens_bs_mean, sens_bs_lb,
            sens_bs_up)
def evaluateCAD(seriesUIDs, results_filename, outputDir, allNodules, CADSystemName, maxNumberOfCADMarks=-1,
                performBootstrapping=False,numberOfBootstrapSamples=1000,confidence = 0.95):
    '''
    function to evaluate a CAD algorithm
    @param seriesUIDs: list of the seriesUIDs of the cases to be processed
    @param results_filename: file with results
    @param outputDir: output directory
    @param allNodules: dictionary with all nodule annotations of all cases, keys of the dictionary are the seriesuids
    @param CADSystemName: name of the CAD system, to be used in filenames and on FROC curve
    '''

    nodOutputfile = open(os.path.join(outputDir,'CADAnalysis.txt'),'w')
    nodOutputfile.write("\n")
    nodOutputfile.write((60 * "*") + "\n")
    nodOutputfile.write("CAD Analysis: %s\n" % CADSystemName)
    nodOutputfile.write((60 * "*") + "\n")
    nodOutputfile.write("\n")

    results = csvTools.readCSV(results_filename)

    allCandsCAD = {}
    
    for seriesuid in seriesUIDs:
        
        # collect candidates from result file
        nodules = {}
        header = results[0]
        
        i = 0
        for result in results[1:]:
            nodule_seriesuid = result[header.index(seriesuid_label)]
            
            if seriesuid == nodule_seriesuid:
                nodule = getNodule(result, header)
                nodule.candidateID = i
                nodules[nodule.candidateID] = nodule
                i += 1

        if (maxNumberOfCADMarks > 0):
            # number of CAD marks, only keep must suspicous marks

            if len(nodules.keys()) > maxNumberOfCADMarks:
                # make a list of all probabilities
                probs = []
                for keytemp, noduletemp in nodules.iteritems():
                    probs.append(float(noduletemp.CADprobability))
                probs.sort(reverse=True) # sort from large to small
                probThreshold = probs[maxNumberOfCADMarks]
                nodules2 = {}
                nrNodules2 = 0
                for keytemp, noduletemp in nodules.iteritems():
                    if nrNodules2 >= maxNumberOfCADMarks:
                        break
                    if float(noduletemp.CADprobability) > probThreshold:
                        nodules2[keytemp] = noduletemp
                        nrNodules2 += 1

                nodules = nodules2
        
        print 'adding candidates: ' + seriesuid
        allCandsCAD[seriesuid] = nodules
    
    # open output files
    nodNoCandFile = open(os.path.join(outputDir, "nodulesWithoutCandidate_%s.txt" % CADSystemName), 'w')
    
    # --- iterate over all cases (seriesUIDs) and determine how
    # often a nodule annotation is not covered by a candidate

    # initialize some variables to be used in the loop
    candTPs = 0
    candFPs = 0
    candFNs = 0
    candTNs = 0
    totalNumberOfCands = 0
    totalNumberOfNodules = 0
    doubleCandidatesIgnored = 0
    irrelevantCandidates = 0
    minProbValue = -1000000000.0 # minimum value of a float
    FROCGTList = []
    FROCProbList = []
    FPDivisorList = []
    excludeList = []
    FROCtoNoduleMap = []
    ignoredCADMarksList = []

    # -- loop over the cases
    for seriesuid in seriesUIDs:
        # get the candidates for this case
        try:
            candidates = allCandsCAD[seriesuid]
        except KeyError:
            candidates = {}

        # add to the total number of candidates
        totalNumberOfCands += len(candidates.keys())

        # make a copy in which items will be deleted
        candidates2 = candidates.copy()

        # get the nodule annotations on this case
        try:
            noduleAnnots = allNodules[seriesuid]
        except KeyError:
            noduleAnnots = []

        # - loop over the nodule annotations
        for noduleAnnot in noduleAnnots:
            # increment the number of nodules
            if noduleAnnot.state == "Included":
                totalNumberOfNodules += 1

            x = float(noduleAnnot.coordX)
            y = float(noduleAnnot.coordY)
            z = float(noduleAnnot.coordZ)

            # 2. Check if the nodule annotation is covered by a candidate
            # A nodule is marked as detected when the center of mass of the candidate is within a distance R of
            # the center of the nodule. In order to ensure that the CAD mark is displayed within the nodule on the
            # CT scan, we set R to be the radius of the nodule size.
            diameter = float(noduleAnnot.diameter_mm)
            if diameter < 0.0:
              diameter = 10.0
            radiusSquared = pow((diameter / 2.0), 2.0)

            found = False
            noduleMatches = []
            for key, candidate in candidates.iteritems():
                x2 = float(candidate.coordX)
                y2 = float(candidate.coordY)
                z2 = float(candidate.coordZ)
                dist = math.pow(x - x2, 2.) + math.pow(y - y2, 2.) + math.pow(z - z2, 2.)
                if dist < radiusSquared:
                    if (noduleAnnot.state == "Included"):
                        found = True
                        noduleMatches.append(candidate)
                        if key not in candidates2.keys():
                            print "This is strange: CAD mark %s detected two nodules! Check for overlapping nodule annotations, SeriesUID: %s, nodule Annot ID: %s" % (str(candidate.id), seriesuid, str(noduleAnnot.id))
                        else:
                            del candidates2[key]
                    elif (noduleAnnot.state == "Excluded"): # an excluded nodule
                        if bOtherNodulesAsIrrelevant: #    delete marks on excluded nodules so they don't count as false positives
                            if key in candidates2.keys():
                                irrelevantCandidates += 1
                                ignoredCADMarksList.append("%s,%s,%s,%s,%s,%s,%.9f" % (seriesuid, -1, candidate.coordX, candidate.coordY, candidate.coordZ, str(candidate.id), float(candidate.CADprobability)))
                                del candidates2[key]
            if len(noduleMatches) > 1: # double detection
                doubleCandidatesIgnored += (len(noduleMatches) - 1)
            if noduleAnnot.state == "Included":
                # only include it for FROC analysis if it is included
                # otherwise, the candidate will not be counted as FP, but ignored in the
                # analysis since it has been deleted from the nodules2 vector of candidates
                if found == True:
                    # append the sample with the highest probability for the FROC analysis
                    maxProb = None
                    for idx in range(len(noduleMatches)):
                        candidate = noduleMatches[idx]
                        if (maxProb is None) or (float(candidate.CADprobability) > maxProb):
                            maxProb = float(candidate.CADprobability)

                    FROCGTList.append(1.0)
                    FROCProbList.append(float(maxProb))
                    FPDivisorList.append(seriesuid)
                    excludeList.append(False)
                    FROCtoNoduleMap.append("%s,%s,%s,%s,%s,%.9f,%s,%.9f" % (seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameter_mm), str(candidate.id), float(candidate.CADprobability)))
                    candTPs += 1
                else:
                    candFNs += 1
                    # append a positive sample with the lowest probability, such that this is added in the FROC analysis
                    FROCGTList.append(1.0)
                    FROCProbList.append(minProbValue)
                    FPDivisorList.append(seriesuid)
                    excludeList.append(True)
                    FROCtoNoduleMap.append("%s,%s,%s,%s,%s,%.9f,%s,%s" % (seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameter_mm), int(-1), "NA"))
                    nodNoCandFile.write("%s,%s,%s,%s,%s,%.9f,%s\n" % (seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameter_mm), str(-1)))

        # add all false positives to the vectors
        for key, candidate3 in candidates2.iteritems():
            candFPs += 1
            FROCGTList.append(0.0)
            FROCProbList.append(float(candidate3.CADprobability))
            FPDivisorList.append(seriesuid)
            excludeList.append(False)
            FROCtoNoduleMap.append("%s,%s,%s,%s,%s,%s,%.9f" % (seriesuid, -1, candidate3.coordX, candidate3.coordY, candidate3.coordZ, str(candidate3.id), float(candidate3.CADprobability)))

    if not (len(FROCGTList) == len(FROCProbList) and len(FROCGTList) == len(FPDivisorList) and len(FROCGTList) == len(FROCtoNoduleMap) and len(FROCGTList) == len(excludeList)):
        nodOutputfile.write("Length of FROC vectors not the same, this should never happen! Aborting..\n")

    nodOutputfile.write("Candidate detection results:\n")
    nodOutputfile.write("    True positives: %d\n" % candTPs)
    nodOutputfile.write("    False positives: %d\n" % candFPs)
    nodOutputfile.write("    False negatives: %d\n" % candFNs)
    nodOutputfile.write("    True negatives: %d\n" % candTNs)
    nodOutputfile.write("    Total number of candidates: %d\n" % totalNumberOfCands)
    nodOutputfile.write("    Total number of nodules: %d\n" % totalNumberOfNodules)

    nodOutputfile.write("    Ignored candidates on excluded nodules: %d\n" % irrelevantCandidates)
    nodOutputfile.write("    Ignored candidates which were double detections on a nodule: %d\n" % doubleCandidatesIgnored)
    if int(totalNumberOfNodules) == 0:
        nodOutputfile.write("    Sensitivity: 0.0\n")
    else:
        nodOutputfile.write("    Sensitivity: %.9f\n" % (float(candTPs) / float(totalNumberOfNodules)))
    nodOutputfile.write("    Average number of candidates per scan: %.9f\n" % (float(totalNumberOfCands) / float(len(seriesUIDs))))

    # compute FROC
    fps, sens, thresholds = computeFROC(FROCGTList,FROCProbList,len(seriesUIDs),excludeList)
    
    if performBootstrapping:
        fps_bs_itp,sens_bs_mean,sens_bs_lb,sens_bs_up = computeFROC_bootstrap(FROCGTList,FROCProbList,FPDivisorList,seriesUIDs,excludeList,
                                                                  numberOfBootstrapSamples=numberOfBootstrapSamples, confidence = confidence)
        
    # Write FROC curve
    with open(os.path.join(outputDir, "froc_%s.txt" % CADSystemName), 'w') as f:
        for i in range(len(sens)):
            f.write("%.9f,%.9f,%.9f\n" % (fps[i], sens[i], thresholds[i]))
    
    # Write FROC vectors to disk as well
    with open(os.path.join(outputDir, "froc_gt_prob_vectors_%s.csv" % CADSystemName), 'w') as f:
        for i in range(len(FROCGTList)):
            f.write("%d,%.9f\n" % (FROCGTList[i], FROCProbList[i]))

    fps_itp = np.linspace(FROC_minX, FROC_maxX, num=10001)
    
    sens_itp = np.interp(fps_itp, fps, sens)
    
    if performBootstrapping:
        # Write mean, lower, and upper bound curves to disk
        with open(os.path.join(outputDir, "froc_%s_bootstrapping.csv" % CADSystemName), 'w') as f:
            f.write("FPrate,Sensivity[Mean],Sensivity[Lower bound],Sensivity[Upper bound]\n")
            for i in range(len(fps_bs_itp)):
                f.write("%.9f,%.9f,%.9f,%.9f\n" % (fps_bs_itp[i], sens_bs_mean[i], sens_bs_lb[i], sens_bs_up[i]))
    else:
        fps_bs_itp = None
        sens_bs_mean = None
        sens_bs_lb = None
        sens_bs_up = None

    # create FROC graphs
    if int(totalNumberOfNodules) > 0:
        graphTitle = str("")
        fig1 = plt.figure()
        ax = plt.gca()
        clr = 'b'
        plt.plot(fps_itp, sens_itp, color=clr, label="%s" % CADSystemName, lw=2)
        if performBootstrapping:
            plt.plot(fps_bs_itp, sens_bs_mean, color=clr, ls='--')
            plt.plot(fps_bs_itp, sens_bs_lb, color=clr, ls=':') # , label = "lb")
            plt.plot(fps_bs_itp, sens_bs_up, color=clr, ls=':') # , label = "ub")
            ax.fill_between(fps_bs_itp, sens_bs_lb, sens_bs_up, facecolor=clr, alpha=0.05)
        xmin = FROC_minX
        xmax = FROC_maxX
        plt.xlim(xmin, xmax)
        plt.ylim(0, 1)
        plt.xlabel('Average number of false positives per scan')
        plt.ylabel('Sensitivity')
        plt.legend(loc='lower right')
        plt.title('FROC performance - %s' % (CADSystemName))
        
        if bLogPlot:
            plt.xscale('log', basex=2)
            ax.xaxis.set_major_formatter(FixedFormatter([0.125,0.25,0.5,1,2,4,8]))
        
        # set your ticks manually
        ax.xaxis.set_ticks([0.125,0.25,0.5,1,2,4,8])
        ax.yaxis.set_ticks(np.arange(0, 1.1, 0.1))
        plt.grid(b=True, which='both')
        plt.tight_layout()

        plt.savefig(os.path.join(outputDir, "froc_%s.png" % CADSystemName), bbox_inches=0, dpi=300)

    return (fps, sens, thresholds, fps_bs_itp, sens_bs_mean, sens_bs_lb, sens_bs_up)