Python readCSV Examples

Programming Language: Python

Namespace/Package Name: tools.csvTools

Method/Function: readCSV

Examples at hotexamples.com: 8

Python readCSV - 8 examples found. These are the top rated real world Python examples of tools.csvTools.readCSV extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def collect(annotations_filename,annotations_excluded_filename,seriesuids_filename,results_filename):
    annotations          = csvTools.readCSV(annotations_filename)
    annotations_excluded = csvTools.readCSV(annotations_excluded_filename)
    results              = csvTools.readCSV(results_filename)
    seriesUIDs_csv       = csvTools.readCSV(seriesuids_filename)

    seriesuid_results = []
    counter = 0;
    for row in results:
        if row[1] != 'seriesuid':
            if counter < 100000:
                seriesuid_results.append(row[1])
                counter += 1;

    annotations_new = []
    for row in annotations:
        if row[0] == 'seriesuid':
            annotations_new.append(row)
        elif row[0] in seriesuid_results:
            annotations_new.append(row)
    annotations = annotations_new

    seriesUIDs = []
    for seriesUID in seriesUIDs_csv:
        seriesUIDs.append(seriesUID[0])

    allNodules = collectNoduleAnnotations(annotations, annotations_excluded, seriesUIDs)

    return (allNodules, seriesUIDs, results)

Example #2

Show file

File: noduleCADEvaluationLUNA16.py Project: tilacyn/DeepSEED-3D-ConvNets-for-Pulmonary-Nodule-Detection

def collect(annotations_filename, annotations_excluded_filename, seriesuids_filename):
    annotations = csvTools.readCSV(annotations_filename)
    annotations_excluded = csvTools.readCSV(annotations_excluded_filename)
    seriesUIDs_csv = csvTools.readCSV(seriesuids_filename)

    seriesUIDs = []
    for seriesUID in seriesUIDs_csv:
        seriesUIDs.append(seriesUID[0])

    allNodules = collectNoduleAnnotations(annotations, annotations_excluded, seriesUIDs)

    return (allNodules, seriesUIDs)

Example #3

Show file

File: evaluation_main.py Project: raymon-tian/tianchiAIMedical2017

def collect(annotations_filename, seriesuids_filename):
    annotations = csvTools.readCSV(annotations_filename)  # 读取GT标注文件
    seriesUIDs_csv = csvTools.readCSV(seriesuids_filename)  # 读取GT文件名列表

    seriesUIDs = []
    for seriesUID in seriesUIDs_csv:  # 将CSV文件内容转化成一个list，每个元素为一个CT图像名
        seriesUIDs.append(seriesUID[0])

    allNodules = collectNoduleAnnotations(
        annotations, seriesUIDs)  # 返回的是所有的nodule,以CT图像名字索引

    return (allNodules, seriesUIDs)

Example #4

Show file

File: noduleCADEvaluationLUNA16.py Project: ericsolo/python

def collect(annotations_filename,annotations_excluded_filename,seriesuids_filename):
    annotations          = csvTools.readCSV(annotations_filename)
    annotations_excluded = csvTools.readCSV(annotations_excluded_filename)
    seriesUIDs_csv = csvTools.readCSV(seriesuids_filename)
    
    seriesUIDs = []
    for seriesUID in seriesUIDs_csv:
        seriesUIDs.append(seriesUID[0])

    allNodules = collectNoduleAnnotations(annotations, annotations_excluded, seriesUIDs)
    
    return (allNodules, seriesUIDs)

Example #5

Show file

File: noduleCADEvaluationLUNA16.py Project: anruoxi123/3D-Res-I

def evaluateCAD(seriesUIDs,
                results_filename,
                outputDir,
                allNodules,
                CADSystemName,
                maxNumberOfCADMarks=-1,
                performBootstrapping=False,
                numberOfBootstrapSamples=1000,
                confidence=0.95):
    '''
    function to evaluate a CAD algorithm
    @param seriesUIDs: list of the seriesUIDs of the cases to be processed
    @param results_filename: file with results
    @param outputDir: output directory
    @param allNodules: dictionary with all nodule annotations of all cases, keys of the dictionary are the seriesuids
    @param CADSystemName: name of the CAD system, to be used in filenames and on FROC curve
    '''

    nodOutputfile = open(os.path.join(outputDir, 'CADAnalysis.txt'), 'w')
    nodOutputfile.write("\n")
    nodOutputfile.write((60 * "*") + "\n")
    nodOutputfile.write("CAD Analysis: %s\n" % CADSystemName)
    nodOutputfile.write((60 * "*") + "\n")
    nodOutputfile.write("\n")

    results = csvTools.readCSV(results_filename)

    allCandsCAD = {}

    for seriesuid in seriesUIDs:

        # collect candidates from result file
        nodules = {}
        header = results[0]

        i = 0
        for result in results[1:]:
            nodule_seriesuid = result[header.index(seriesuid_label)]

            if seriesuid == nodule_seriesuid:
                nodule = getNodule(result, header)
                nodule.candidateID = i
                nodules[nodule.candidateID] = nodule
                i += 1

        if (maxNumberOfCADMarks > 0):
            # number of CAD marks, only keep must suspicous marks

            if len(nodules.keys()) > maxNumberOfCADMarks:
                # make a list of all probabilities
                probs = []
                for keytemp, noduletemp in nodules.iteritems():
                    probs.append(float(noduletemp.CADprobability))
                probs.sort(reverse=True)  # sort from large to small
                probThreshold = probs[maxNumberOfCADMarks]
                nodules2 = {}
                nrNodules2 = 0
                for keytemp, noduletemp in nodules.iteritems():
                    if nrNodules2 >= maxNumberOfCADMarks:
                        break
                    if float(noduletemp.CADprobability) > probThreshold:
                        nodules2[keytemp] = noduletemp
                        nrNodules2 += 1

                nodules = nodules2

        # print 'adding candidates: ' + seriesuid
        allCandsCAD[seriesuid] = nodules

    # open output files
    nodNoCandFile = open(
        os.path.join(outputDir,
                     "nodulesWithoutCandidate_%s.txt" % CADSystemName), 'w')

    # --- iterate over all cases (seriesUIDs) and determine how
    # often a nodule annotation is not covered by a candidate

    # initialize some variables to be used in the loop
    candTPs = 0
    candFPs = 0
    candFNs = 0
    candTNs = 0
    totalNumberOfCands = 0
    totalNumberOfNodules = 0
    doubleCandidatesIgnored = 0
    irrelevantCandidates = 0
    minProbValue = -1000000000.0  # minimum value of a float
    FROCGTList = []
    FROCProbList = []
    FPDivisorList = []
    excludeList = []
    FROCtoNoduleMap = []
    ignoredCADMarksList = []

    # -- loop over the cases
    for seriesuid in seriesUIDs:
        # get the candidates for this case
        try:
            candidates = allCandsCAD[seriesuid]
        except KeyError:
            candidates = {}

        # add to the total number of candidates
        totalNumberOfCands += len(candidates.keys())

        # make a copy in which items will be deleted
        candidates2 = candidates.copy()

        # get the nodule annotations on this case
        try:
            noduleAnnots = allNodules[seriesuid]
        except KeyError:
            noduleAnnots = []

        # - loop over the nodule annotations
        for noduleAnnot in noduleAnnots:
            # increment the number of nodules
            if noduleAnnot.state == "Included":
                totalNumberOfNodules += 1

            x = float(noduleAnnot.coordX)
            y = float(noduleAnnot.coordY)
            z = float(noduleAnnot.coordZ)

            # 2. Check if the nodule annotation is covered by a candidate
            # A nodule is marked as detected when the center of mass of the candidate is within a distance R of
            # the center of the nodule. In order to ensure that the CAD mark is displayed within the nodule on the
            # CT scan, we set R to be the radius of the nodule size.
            diameter = float(noduleAnnot.diameter_mm)
            if diameter < 0.0:
                diameter = 10.0
            radiusSquared = pow((diameter / 2.0), 2.0)

            found = False
            noduleMatches = []
            for key, candidate in candidates.iteritems():
                x2 = float(candidate.coordX)
                y2 = float(candidate.coordY)
                z2 = float(candidate.coordZ)
                dist = math.pow(x - x2, 2.) + math.pow(y - y2, 2.) + math.pow(
                    z - z2, 2.)
                if dist < radiusSquared:
                    if (noduleAnnot.state == "Included"):
                        found = True
                        noduleMatches.append(candidate)
                        if key not in candidates2.keys():
                            print(
                                "This is strange: CAD mark %s detected two nodules! Check for overlapping nodule annotations, SeriesUID: %s, nodule Annot ID: %s"
                                % (str(candidate.id), seriesuid,
                                   str(noduleAnnot.id)))
                        else:
                            del candidates2[key]
                    elif (noduleAnnot.state == "Excluded"
                          ):  # an excluded nodule
                        if bOtherNodulesAsIrrelevant:  #    delete marks on excluded nodules so they don't count as false positives
                            if key in candidates2.keys():
                                irrelevantCandidates += 1
                                ignoredCADMarksList.append(
                                    "%s,%s,%s,%s,%s,%s,%.9f" %
                                    (seriesuid, -1, candidate.coordX,
                                     candidate.coordY, candidate.coordZ,
                                     str(candidate.id),
                                     float(candidate.CADprobability)))
                                del candidates2[key]
            if len(noduleMatches) > 1:  # double detection
                doubleCandidatesIgnored += (len(noduleMatches) - 1)
            if noduleAnnot.state == "Included":
                # only include it for FROC analysis if it is included
                # otherwise, the candidate will not be counted as FP, but ignored in the
                # analysis since it has been deleted from the nodules2 vector of candidates
                if found == True:
                    # append the sample with the highest probability for the FROC analysis
                    maxProb = None
                    for idx in range(len(noduleMatches)):
                        candidate = noduleMatches[idx]
                        if (maxProb is None) or (float(
                                candidate.CADprobability) > maxProb):
                            maxProb = float(candidate.CADprobability)

                    FROCGTList.append(1.0)
                    FROCProbList.append(float(maxProb))
                    FPDivisorList.append(seriesuid)
                    excludeList.append(False)
                    FROCtoNoduleMap.append(
                        "%s,%s,%s,%s,%s,%.9f,%s,%.9f" %
                        (seriesuid, noduleAnnot.id, noduleAnnot.coordX,
                         noduleAnnot.coordY, noduleAnnot.coordZ,
                         float(noduleAnnot.diameter_mm), str(
                             candidate.id), float(candidate.CADprobability)))
                    candTPs += 1
                else:
                    candFNs += 1
                    # append a positive sample with the lowest probability, such that this is added in the FROC analysis
                    FROCGTList.append(1.0)
                    FROCProbList.append(minProbValue)
                    FPDivisorList.append(seriesuid)
                    excludeList.append(True)
                    FROCtoNoduleMap.append(
                        "%s,%s,%s,%s,%s,%.9f,%s,%s" %
                        (seriesuid, noduleAnnot.id, noduleAnnot.coordX,
                         noduleAnnot.coordY, noduleAnnot.coordZ,
                         float(noduleAnnot.diameter_mm), int(-1), "NA"))
                    nodNoCandFile.write(
                        "%s,%s,%s,%s,%s,%.9f,%s\n" %
                        (seriesuid, noduleAnnot.id, noduleAnnot.coordX,
                         noduleAnnot.coordY, noduleAnnot.coordZ,
                         float(noduleAnnot.diameter_mm), str(-1)))

        # add all false positives to the vectors
        for key, candidate3 in candidates2.iteritems():
            candFPs += 1
            FROCGTList.append(0.0)
            FROCProbList.append(float(candidate3.CADprobability))
            FPDivisorList.append(seriesuid)
            excludeList.append(False)
            FROCtoNoduleMap.append(
                "%s,%s,%s,%s,%s,%s,%.9f" %
                (seriesuid, -1, candidate3.coordX, candidate3.coordY,
                 candidate3.coordZ, str(
                     candidate3.id), float(candidate3.CADprobability)))

    if not (len(FROCGTList) == len(FROCProbList) and len(FROCGTList)
            == len(FPDivisorList) and len(FROCGTList) == len(FROCtoNoduleMap)
            and len(FROCGTList) == len(excludeList)):
        nodOutputfile.write(
            "Length of FROC vectors not the same, this should never happen! Aborting..\n"
        )

    nodOutputfile.write("Candidate detection results:\n")
    nodOutputfile.write("    True positives: %d\n" % candTPs)
    nodOutputfile.write("    False positives: %d\n" % candFPs)
    nodOutputfile.write("    False negatives: %d\n" % candFNs)
    nodOutputfile.write("    True negatives: %d\n" % candTNs)
    nodOutputfile.write("    Total number of candidates: %d\n" %
                        totalNumberOfCands)
    nodOutputfile.write("    Total number of nodules: %d\n" %
                        totalNumberOfNodules)

    nodOutputfile.write("    Ignored candidates on excluded nodules: %d\n" %
                        irrelevantCandidates)
    nodOutputfile.write(
        "    Ignored candidates which were double detections on a nodule: %d\n"
        % doubleCandidatesIgnored)
    if int(totalNumberOfNodules) == 0:
        nodOutputfile.write("    Sensitivity: 0.0\n")
    else:
        nodOutputfile.write("    Sensitivity: %.9f\n" %
                            (float(candTPs) / float(totalNumberOfNodules)))
    nodOutputfile.write("    Average number of candidates per scan: %.9f\n" %
                        (float(totalNumberOfCands) / float(len(seriesUIDs))))

    # compute FROC
    fps, sens, thresholds = computeFROC(FROCGTList, FROCProbList,
                                        len(seriesUIDs), excludeList)

    if performBootstrapping:
        fps_bs_itp, sens_bs_mean, sens_bs_lb, sens_bs_up = computeFROC_bootstrap(
            FROCGTList,
            FROCProbList,
            FPDivisorList,
            seriesUIDs,
            excludeList,
            numberOfBootstrapSamples=numberOfBootstrapSamples,
            confidence=confidence)

    # Write FROC curve
    with open(os.path.join(outputDir, "froc_%s.txt" % CADSystemName),
              'w') as f:
        for i in range(len(sens)):
            f.write("%.9f,%.9f,%.9f\n" % (fps[i], sens[i], thresholds[i]))

    # Write FROC vectors to disk as well
    with open(
            os.path.join(outputDir,
                         "froc_gt_prob_vectors_%s.csv" % CADSystemName),
            'w') as f:
        for i in range(len(FROCGTList)):
            f.write("%d,%.9f\n" % (FROCGTList[i], FROCProbList[i]))

    fps_itp = np.linspace(FROC_minX, FROC_maxX, num=10001)

    sens_itp = np.interp(fps_itp, fps, sens)
    frvvlu = 0
    nxth = 0.125
    for fp, ss in zip(fps_itp, sens_itp):
        if abs(fp - nxth) < 3e-4:
            frvvlu += ss
            nxth *= 2
        if abs(nxth - 16) < 1e-5: break
    print(frvvlu / 7, nxth)
    print(sens_itp[fps_itp==0.125]+sens_itp[fps_itp==0.25]+sens_itp[fps_itp==0.5]+sens_itp[fps_itp==1]+sens_itp[fps_itp==2]\
        +sens_itp[fps_itp==4]+sens_itp[fps_itp==8])
    if performBootstrapping:
        # Write mean, lower, and upper bound curves to disk
        with open(
                os.path.join(outputDir,
                             "froc_%s_bootstrapping.csv" % CADSystemName),
                'w') as f:
            f.write(
                "FPrate,Sensivity[Mean],Sensivity[Lower bound],Sensivity[Upper bound]\n"
            )
            for i in range(len(fps_bs_itp)):
                f.write("%.9f,%.9f,%.9f,%.9f\n" %
                        (fps_bs_itp[i], sens_bs_mean[i], sens_bs_lb[i],
                         sens_bs_up[i]))
    else:
        fps_bs_itp = None
        sens_bs_mean = None
        sens_bs_lb = None
        sens_bs_up = None

    # create FROC graphs
    if int(totalNumberOfNodules) > 0:
        graphTitle = str("")
        fig1 = plt.figure()
        ax = plt.gca()
        clr = 'b'
        plt.plot(fps_itp,
                 sens_itp,
                 color=clr,
                 label="%s" % CADSystemName,
                 lw=2)
        if performBootstrapping:
            plt.plot(fps_bs_itp, sens_bs_mean, color=clr, ls='--')
            plt.plot(fps_bs_itp, sens_bs_lb, color=clr,
                     ls=':')  # , label = "lb")
            plt.plot(fps_bs_itp, sens_bs_up, color=clr,
                     ls=':')  # , label = "ub")
            ax.fill_between(fps_bs_itp,
                            sens_bs_lb,
                            sens_bs_up,
                            facecolor=clr,
                            alpha=0.05)
        xmin = FROC_minX
        xmax = FROC_maxX
        plt.xlim(xmin, xmax)
        plt.ylim(0.5, 1)
        plt.xlabel('Average number of false positives per scan')
        plt.ylabel('Sensitivity')
        plt.legend(loc='lower right')
        plt.title('FROC performance - %s' % (CADSystemName))

        if bLogPlot:
            plt.xscale('log', basex=2)
            ax.xaxis.set_major_formatter(
                FixedFormatter([0.125, 0.25, 0.5, 1, 2, 4, 8]))

        # set your ticks manually
        ax.xaxis.set_ticks([0.125, 0.25, 0.5, 1, 2, 4, 8])
        ax.yaxis.set_ticks(np.arange(0.5, 1, 0.1))
        # ax.yaxis.set_ticks(np.arange(0, 1.1, 0.1))
        plt.grid(b=True, which='both')
        plt.tight_layout()

        plt.savefig(os.path.join(outputDir, "froc_%s.png" % CADSystemName),
                    bbox_inches=0,
                    dpi=300)

    return (fps, sens, thresholds, fps_bs_itp, sens_bs_mean, sens_bs_lb,
            sens_bs_up)

Example #6

Show file

File: evaluation_main.py Project: raymon-tian/tianchiAIMedical2017

def evaluateCAD(seriesUIDs,
                results_filename,
                outputDir,
                allNodules,
                CADSystemName,
                maxNumberOfCADMarks=-1,
                performBootstrapping=False,
                numberOfBootstrapSamples=1000,
                confidence=0.95):
    '''
    function to evaluate a CAD algorithm
    @param seriesUIDs: 所有的测试集CT图像名称列表
    @param results_filename: 提交的csv文件，*.csv
    @param outputDir: 存放F-ROC计算结果的文件夹路径
    @param allNodules: 所有的nodule构成的字典，以图像名索引，GT
    @param CADSystemName: 系统名字，用来作为文件的前缀之类
    @param maxNumberOfCADMarks: 一张CT图像最多允许多少条标注
    @param performBootstrapping:
    @param numberOfBootstrapSamples:
    @param confidence:
    '''

    nodOutputfile = open(os.path.join(outputDir, 'CADAnalysis.txt'), 'w')
    nodOutputfile.write("\n")
    nodOutputfile.write((60 * "*") + "\n")
    nodOutputfile.write("CAD Analysis: %s\n" % CADSystemName)
    nodOutputfile.write((60 * "*") + "\n")
    nodOutputfile.write("\n")

    results = csvTools.readCSV(results_filename)  # 最终的csv文件结果

    allCandsCAD = {}

    for seriesuid in seriesUIDs:  # 对每一个测试图像ID

        # collect candidates from result file
        nodules = {}
        header = results[0]  # csv文件第一行，表头

        i = 0
        for result in results[1:]:  # 对于每一个标注
            nodule_seriesuid = result[header.index(seriesuid_label)]  # 该标注的文件名

            if seriesuid == nodule_seriesuid:  # 判断该标注的是否是suriesuid
                nodule = getNodule(result, header)
                nodule.candidateID = i
                nodules[nodule.candidateID] = nodule  # 同一个ID的所有nodule
                i += 1

        if (maxNumberOfCADMarks > 0):
            # 如果一张CT图像的标注超过某个值，就按照得分排序，只截取前maxNumberOfCADMarks条记录
            if len(nodules.keys()) > maxNumberOfCADMarks:
                # make a list of all probabilities
                probs = []
                for keytemp, noduletemp in nodules.iteritems():
                    probs.append(float(noduletemp.CADprobability))
                probs.sort(reverse=True)  # sort from large to small
                probThreshold = probs[maxNumberOfCADMarks]
                nodules2 = {}
                nrNodules2 = 0
                for keytemp, noduletemp in nodules.iteritems():
                    if nrNodules2 >= maxNumberOfCADMarks:
                        break
                    if float(noduletemp.CADprobability) > probThreshold:
                        nodules2[keytemp] = noduletemp
                        nrNodules2 += 1

                nodules = nodules2

        print 'adding candidates: ' + seriesuid
        allCandsCAD[seriesuid] = nodules  # 以图像名称索引nodule字典

    # open output files
    nodNoCandFile = open(
        os.path.join(outputDir,
                     "nodulesWithoutCandidate_%s.txt" % CADSystemName), 'w')

    # --- iterate over all cases (seriesUIDs) and determine how
    # often a nodule annotation is not covered by a candidate

    # initialize some variables to be used in the loop
    candTPs = 0
    candFPs = 0
    candFNs = 0
    candTNs = 0
    totalNumberOfCands = 0
    totalNumberOfNodules = 0
    doubleCandidatesIgnored = 0
    irrelevantCandidates = 0
    minProbValue = -1000000000.0  # minimum value of a float
    FROCGTList = []
    FROCProbList = []
    FPDivisorList = []
    excludeList = []
    FROCtoNoduleMap = []
    ignoredCADMarksList = []

    # -- loop over the cases
    for seriesuid in seriesUIDs:  # 对于每一张CT图像
        # get the candidates for this case
        try:
            candidates = allCandsCAD[seriesuid]  # 该图像的预测标注信息
        except KeyError:
            candidates = {}

        totalNumberOfCands += len(candidates.keys())  # 预测标注总个数

        # make a copy in which items will be deleted
        candidates2 = candidates.copy()  # 复制该图像的预测标注信息

        # get the nodule annotations on this case
        try:
            noduleAnnots = allNodules[seriesuid]  # 该图像的GT标注信息
        except KeyError:
            noduleAnnots = []

        # - loop over the nodule annotations
        for noduleAnnot in noduleAnnots:  # 对GT标注中的每一条记录
            # increment the number of nodules
            if noduleAnnot.state == "Included":  # 该标注被用来计算结果
                totalNumberOfNodules += 1  # 记录GT标注总数

            x = float(noduleAnnot.coordX)  # GT标注 X坐标
            y = float(noduleAnnot.coordY)  # GT标注 Y坐标
            z = float(noduleAnnot.coordZ)  # GT标注 Z坐标

            # 2. Check if the nodule annotation is covered by a candidate
            # A nodule is marked as detected when the center of mass of the candidate is within a distance R of
            # the center of the nodule. In order to ensure that the CAD mark is displayed within the nodule on the
            # CT scan, we set R to be the radius of the nodule size.
            diameter = float(noduleAnnot.diameter_mm)  # GT标注的直径
            if diameter < 0.0:
                diameter = 5
            radiusSquared = pow((diameter / 2.0), 2.0)  # GT 半径的平方

            found = False
            noduleMatches = []
            for key, candidate in candidates.iteritems():  # 遍历预测的每一条标注
                x2 = float(candidate.coordX)  # 预测的坐标 X
                y2 = float(candidate.coordY)  # 预测的坐标 Y
                z2 = float(candidate.coordZ)  # 预测的坐标 Z
                dist = math.pow(x - x2, 2.) + math.pow(y - y2, 2.) + math.pow(
                    z - z2, 2.)  # 预测与真实的距离的差
                if dist < radiusSquared:  # 如果距离小于半径
                    if (noduleAnnot.state == "Included"):  # 可被用于测评的标注
                        found = True
                        noduleMatches.append(
                            candidate)  # 将该条预测的标注添加到 noduleMatches
                        if key not in candidates2.keys():
                            print "This is strange: CAD mark %s detected two nodules! Check for overlapping nodule annotations, SeriesUID: %s, nodule Annot ID: %s" % (
                                str(candidate.id), seriesuid,
                                str(noduleAnnot.id))
                        else:
                            del candidates2[key]  # 在candidates2中将相应数据删除
                    elif (noduleAnnot.state == "Excluded"
                          ):  # an excluded nodule
                        if bOtherNodulesAsIrrelevant:  #    delete marks on excluded nodules so they don't count as false positives
                            if key in candidates2.keys():
                                irrelevantCandidates += 1
                                ignoredCADMarksList.append(
                                    "%s,%s,%s,%s,%s,%s,%.9f" %
                                    (seriesuid, -1, candidate.coordX,
                                     candidate.coordY, candidate.coordZ,
                                     str(candidate.id),
                                     float(candidate.CADprobability)))
                                del candidates2[key]
            if len(noduleMatches) > 1:  # 如果预测的标注中有至少两个都预测到了GT的某一个nodule
                doubleCandidatesIgnored += (len(noduleMatches) - 1
                                            )  # 舍弃多余的标注，记录舍弃的数目
            if noduleAnnot.state == "Included":  # 判断GT中的这条标注可被用来计算F-ROC
                # only include it for FROC analysis if it is included
                # otherwise, the candidate will not be counted as FP, but ignored in the
                # analysis since it has been deleted from the nodules2 vector of candidates
                if found == True:  # 对该条GT标注，在预测标注中，找到了至少一条符合的
                    # append the sample with the highest probability for the FROC analysis
                    maxProb = None
                    for idx in range(
                            len(noduleMatches)):  # 对所有符合条件的预测，寻找最大的概率的一条
                        candidate = noduleMatches[idx]
                        if (maxProb is None) or (float(
                                candidate.CADprobability) > maxProb):
                            maxProb = float(candidate.CADprobability)

                    FROCGTList.append(1.0)
                    FROCProbList.append(float(maxProb))
                    FPDivisorList.append(seriesuid)
                    excludeList.append(False)
                    FROCtoNoduleMap.append(
                        "%s,%s,%s,%s,%s,%.9f,%s,%.9f" %
                        (seriesuid, noduleAnnot.id, noduleAnnot.coordX,
                         noduleAnnot.coordY, noduleAnnot.coordZ,
                         float(noduleAnnot.diameter_mm), str(
                             candidate.id), float(candidate.CADprobability)))
                    candTPs += 1
                else:
                    candFNs += 1
                    # append a positive sample with the lowest probability, such that this is added in the FROC analysis
                    FROCGTList.append(1.0)
                    FROCProbList.append(minProbValue)
                    FPDivisorList.append(seriesuid)
                    excludeList.append(True)
                    FROCtoNoduleMap.append(
                        "%s,%s,%s,%s,%s,%.9f,%s,%s" %
                        (seriesuid, noduleAnnot.id, noduleAnnot.coordX,
                         noduleAnnot.coordY, noduleAnnot.coordZ,
                         float(noduleAnnot.diameter_mm), int(-1), "NA"))
                    nodNoCandFile.write(
                        "%s,%s,%s,%s,%s,%.9f,%s\n" %
                        (seriesuid, noduleAnnot.id, noduleAnnot.coordX,
                         noduleAnnot.coordY, noduleAnnot.coordZ,
                         float(noduleAnnot.diameter_mm), str(-1)))

        # add all false positives to the vectors
        for key, candidate3 in candidates2.iteritems():
            candFPs += 1
            FROCGTList.append(0.0)
            FROCProbList.append(float(candidate3.CADprobability))
            FPDivisorList.append(seriesuid)
            excludeList.append(False)
            FROCtoNoduleMap.append(
                "%s,%s,%s,%s,%s,%s,%.9f" %
                (seriesuid, -1, candidate3.coordX, candidate3.coordY,
                 candidate3.coordZ, str(
                     candidate3.id), float(candidate3.CADprobability)))

    if not (len(FROCGTList) == len(FROCProbList) and len(FROCGTList)
            == len(FPDivisorList) and len(FROCGTList) == len(FROCtoNoduleMap)
            and len(FROCGTList) == len(excludeList)):
        nodOutputfile.write(
            "Length of FROC vectors not the same, this should never happen! Aborting..\n"
        )

    nodOutputfile.write("Candidate detection results:\n")
    nodOutputfile.write("    True positives: %d\n" % candTPs)
    nodOutputfile.write("    False positives: %d\n" % candFPs)
    nodOutputfile.write("    False negatives: %d\n" % candFNs)
    nodOutputfile.write("    True negatives: %d\n" % candTNs)
    nodOutputfile.write("    Total number of candidates: %d\n" %
                        totalNumberOfCands)
    nodOutputfile.write("    Total number of nodules: %d\n" %
                        totalNumberOfNodules)

    nodOutputfile.write("    Ignored candidates on excluded nodules: %d\n" %
                        irrelevantCandidates)
    nodOutputfile.write(
        "    Ignored candidates which were double detections on a nodule: %d\n"
        % doubleCandidatesIgnored)
    if int(totalNumberOfNodules) == 0:
        nodOutputfile.write("    Sensitivity: 0.0\n")
    else:
        nodOutputfile.write("    Sensitivity: %.9f\n" %
                            (float(candTPs) / float(totalNumberOfNodules)))
    nodOutputfile.write("    Average number of candidates per scan: %.9f\n" %
                        (float(totalNumberOfCands) / float(len(seriesUIDs))))

    # compute FROC
    fps, sens, thresholds = computeFROC(FROCGTList, FROCProbList,
                                        len(seriesUIDs), excludeList)

    if performBootstrapping:
        fps_bs_itp, sens_bs_mean, sens_bs_lb, sens_bs_up = computeFROC_bootstrap(
            FROCGTList,
            FROCProbList,
            FPDivisorList,
            seriesUIDs,
            excludeList,
            numberOfBootstrapSamples=numberOfBootstrapSamples,
            confidence=confidence)

    # Write FROC curve
    with open(os.path.join(outputDir, "froc_%s.txt" % CADSystemName),
              'w') as f:
        for i in range(len(sens)):
            f.write("%.9f,%.9f,%.9f\n" % (fps[i], sens[i], thresholds[i]))

    # Write FROC vectors to disk as well
    with open(
            os.path.join(outputDir,
                         "froc_gt_prob_vectors_%s.csv" % CADSystemName),
            'w') as f:
        for i in range(len(FROCGTList)):
            f.write("%d,%.9f\n" % (FROCGTList[i], FROCProbList[i]))

    fps_itp = np.linspace(FROC_minX, FROC_maxX, num=10001)

    sens_itp = np.interp(fps_itp, fps, sens)

    sum_sensitivity = 0
    for idx in range(len(fps_itp) - 1):
        if fps_itp[idx] < 0.125 and fps_itp[idx + 1] > 0.125:
            print("0.125:", sens_itp[idx])
            sum_sensitivity += sens_itp[idx]
        if fps_itp[idx] < 0.25 and fps_itp[idx + 1] > 0.25:
            print("0.25:", sens_itp[idx])
            sum_sensitivity += sens_itp[idx]
        if fps_itp[idx] < 0.5 and fps_itp[idx + 1] > 0.5:
            print("0.5:", sens_itp[idx])
            sum_sensitivity += sens_itp[idx]
        if fps_itp[idx] < 1 and fps_itp[idx + 1] > 1:
            print("1:", sens_itp[idx])
            sum_sensitivity += sens_itp[idx]
        if fps_itp[idx] < 2 and fps_itp[idx + 1] > 2:
            print("2:", sens_itp[idx])
            sum_sensitivity += sens_itp[idx]
        if fps_itp[idx] < 4 and fps_itp[idx + 1] > 4:
            print("4:", sens_itp[idx])
            sum_sensitivity += sens_itp[idx]
        if fps_itp[idx] < 8 and fps_itp[idx + 1] > 8:
            print("8:", sens_itp[idx])
            sum_sensitivity += sens_itp[idx]
    ave_sensitivity = sum_sensitivity / 7.0
    print("final score is %d" % ave_sensitivity)
    if performBootstrapping:
        # Write mean, lower, and upper bound curves to disk
        with open(
                os.path.join(outputDir,
                             "froc_%s_bootstrapping.csv" % CADSystemName),
                'w') as f:
            f.write(
                "FPrate,Sensivity[Mean],Sensivity[Lower bound],Sensivity[Upper bound]\n"
            )
            for i in range(len(fps_bs_itp)):
                f.write("%.9f,%.9f,%.9f,%.9f\n" %
                        (fps_bs_itp[i], sens_bs_mean[i], sens_bs_lb[i],
                         sens_bs_up[i]))
    else:
        fps_bs_itp = None
        sens_bs_mean = None
        sens_bs_lb = None
        sens_bs_up = None

    # create FROC graphs
    if int(totalNumberOfNodules) > 0:
        graphTitle = str("")
        fig1 = plt.figure()
        ax = plt.gca()
        clr = 'b'
        plt.plot(fps_itp,
                 sens_itp,
                 color=clr,
                 label="%s" % CADSystemName,
                 lw=2)
        if performBootstrapping:
            plt.plot(fps_bs_itp, sens_bs_mean, color=clr, ls='--')
            plt.plot(fps_bs_itp, sens_bs_lb, color=clr,
                     ls=':')  # , label = "lb")
            plt.plot(fps_bs_itp, sens_bs_up, color=clr,
                     ls=':')  # , label = "ub")
            ax.fill_between(fps_bs_itp,
                            sens_bs_lb,
                            sens_bs_up,
                            facecolor=clr,
                            alpha=0.05)
        xmin = FROC_minX
        xmax = FROC_maxX
        plt.xlim(xmin, xmax)
        plt.ylim(0, 1)
        plt.xlabel('Average number of false positives per scan')
        plt.ylabel('Sensitivity')
        plt.legend(loc='lower right')
        plt.title('FROC performance - %s' % (CADSystemName))

        if bLogPlot:
            plt.xscale('log', basex=2)
            ax.xaxis.set_major_formatter(
                FixedFormatter([0.125, 0.25, 0.5, 1, 2, 4, 8]))

        # set your ticks manually
        ax.xaxis.set_ticks([0.125, 0.25, 0.5, 1, 2, 4, 8])
        ax.yaxis.set_ticks(np.arange(0, 1.1, 0.1))
        plt.grid(b=True, which='both')
        plt.tight_layout()

        plt.savefig(os.path.join(outputDir, "froc_%s.png" % CADSystemName),
                    bbox_inches=0,
                    dpi=300)

    return (fps, sens, thresholds, fps_bs_itp, sens_bs_mean, sens_bs_lb,
            sens_bs_up)

Example #7

Show file

def evaluateCAD(seriesUIDs,
                results_filename,
                outputDir,
                allNodules,
                CADSystemName,
                maxNumberOfCADMarks=-1,
                performBootstrapping=False,
                numberOfBootstrapSamples=1000,
                confidence=0.95):
    '''
    用于评估CAD算法的功能
    @param seriesUIDs: 要处理的案例的seriesUID列表
    @param results_filename: 带有结果的文件
    @param outputDir:输出目录
    @param allNodules: 所有案例的所有结节注释的字典，字典的键是seriesuids
    @param CADSystemName: CAD系统的名称，用于文件名和FROC曲线
    '''

    nodOutputfile = open(os.path.join(outputDir, 'CADAnalysis.txt'), 'w')
    nodOutputfile.write("\n")
    nodOutputfile.write((60 * "*") + "\n")
    nodOutputfile.write("CAD Analysis: %s\n" % CADSystemName)
    nodOutputfile.write((60 * "*") + "\n")
    nodOutputfile.write("\n")

    results = csvTools.readCSV(results_filename)

    allCandsCAD = {}

    for seriesuid in seriesUIDs:

        # 从结果文件中收集候选人
        nodules = {}
        header = results[0]

        i = 0
        for result in results[1:]:
            nodule_seriesuid = result[header.index(seriesuid_label)]

            if seriesuid == nodule_seriesuid:
                nodule = getNodule(result, header)
                nodule.candidateID = i
                nodules[nodule.candidateID] = nodule
                i += 1

        if (maxNumberOfCADMarks > 0):
            # CAD标记的数量，只保留最可疑的标记

            if len(nodules.keys()) > maxNumberOfCADMarks:
                # 列出所有概率
                probs = []
                for keytemp, noduletemp in nodules.iteritems():
                    probs.append(float(noduletemp.CADprobability))
                probs.sort(reverse=True)  # 从大到小排序
                probThreshold = probs[maxNumberOfCADMarks]
                nodules2 = {}
                nrNodules2 = 0
                for keytemp, noduletemp in nodules.iteritems():
                    if nrNodules2 >= maxNumberOfCADMarks:
                        break
                    if float(noduletemp.CADprobability) > probThreshold:
                        nodules2[keytemp] = noduletemp
                        nrNodules2 += 1

                nodules = nodules2

        print 'adding candidates: ' + seriesuid
        allCandsCAD[seriesuid] = nodules

    # 打开输出文件
    nodNoCandFile = open(
        os.path.join(outputDir,
                     "nodulesWithoutCandidate_%s.txt" % CADSystemName), 'w')

    # 迭代所有情况（seriesUID）并确定候选者不覆盖结节注释的频率

    # 初始化一些要在循环中使用的变量
    candTPs = 0
    candFPs = 0
    candFNs = 0
    candTNs = 0
    totalNumberOfCands = 0
    totalNumberOfNodules = 0
    doubleCandidatesIgnored = 0
    irrelevantCandidates = 0
    minProbValue = -1000000000.0  # 浮点数的最小值
    FROCGTList = []
    FROCProbList = []
    FPDivisorList = []
    excludeList = []
    FROCtoNoduleMap = []
    ignoredCADMarksList = []

    # -- 循环案例
    for seriesuid in seriesUIDs:
        # 得到这个案件的候选人
        try:
            candidates = allCandsCAD[seriesuid]
        except KeyError:
            candidates = {}

        # 增加候选人总数
        totalNumberOfCands += len(candidates.keys())

        # 制作要删除项目的副本
        candidates2 = candidates.copy()

        # 在这种情况下得到结节注释
        try:
            noduleAnnots = allNodules[seriesuid]
        except KeyError:
            noduleAnnots = []

        # - 循环结节标注
        for noduleAnnot in noduleAnnots:
            # 增加结节数量
            if noduleAnnot.state == "Included":
                totalNumberOfNodules += 1

            x = float(noduleAnnot.coordX)
            y = float(noduleAnnot.coordY)
            z = float(noduleAnnot.coordZ)

            # 2. Check if the nodule annotation is covered by a candidate
            # A nodule is marked as detected when the center of mass of the candidate is within a distance R of
            # the center of the nodule. In order to ensure that the CAD mark is displayed within the nodule on the
            # CT scan, we set R to be the radius of the nodule size.
            diameter = float(noduleAnnot.diameter_mm)
            if diameter < 0.0:
                diameter = 10.0
            radiusSquared = pow((diameter / 2.0), 2.0)

            found = False
            noduleMatches = []
            for key, candidate in candidates.iteritems():
                x2 = float(candidate.coordX)
                y2 = float(candidate.coordY)
                z2 = float(candidate.coordZ)
                dist = math.pow(x - x2, 2.) + math.pow(y - y2, 2.) + math.pow(
                    z - z2, 2.)
                if dist < radiusSquared:
                    if (noduleAnnot.state == "Included"):
                        found = True
                        noduleMatches.append(candidate)
                        if key not in candidates2.keys():
                            print "This is strange: CAD mark %s detected two nodules! Check for overlapping nodule annotations, SeriesUID: %s, nodule Annot ID: %s" % (
                                str(candidate.id), seriesuid,
                                str(noduleAnnot.id))
                        else:
                            del candidates2[key]
                    elif (noduleAnnot.state == "Excluded"):  # 排除的结节
                        if bOtherNodulesAsIrrelevant:  # 删除排除的结节上的标记，因此它们不算作负样本
                            if key in candidates2.keys():
                                irrelevantCandidates += 1
                                ignoredCADMarksList.append(
                                    "%s,%s,%s,%s,%s,%s,%.9f" %
                                    (seriesuid, -1, candidate.coordX,
                                     candidate.coordY, candidate.coordZ,
                                     str(candidate.id),
                                     float(candidate.CADprobability)))
                                del candidates2[key]
            if len(noduleMatches) > 1:  # 双重检测
                doubleCandidatesIgnored += (len(noduleMatches) - 1)
            if noduleAnnot.state == "Included":
                # only include it for FROC analysis if it is included
                # otherwise, the candidate will not be counted as FP, but ignored in the
                # analysis since it has been deleted from the nodules2 vector of candidates
                if found == True:
                    # 将样本附加到FROC分析的最高概率
                    maxProb = None
                    for idx in range(len(noduleMatches)):
                        candidate = noduleMatches[idx]
                        if (maxProb is None) or (float(
                                candidate.CADprobability) > maxProb):
                            maxProb = float(candidate.CADprobability)

                    FROCGTList.append(1.0)
                    FROCProbList.append(float(maxProb))
                    FPDivisorList.append(seriesuid)
                    excludeList.append(False)
                    FROCtoNoduleMap.append(
                        "%s,%s,%s,%s,%s,%.9f,%s,%.9f" %
                        (seriesuid, noduleAnnot.id, noduleAnnot.coordX,
                         noduleAnnot.coordY, noduleAnnot.coordZ,
                         float(noduleAnnot.diameter_mm), str(
                             candidate.id), float(candidate.CADprobability)))
                    candTPs += 1
                else:
                    candFNs += 1
                    # 附加具有最低概率的阳性样本，以便在FROC分析中添加该样本
                    FROCGTList.append(1.0)
                    FROCProbList.append(minProbValue)
                    FPDivisorList.append(seriesuid)
                    excludeList.append(True)
                    FROCtoNoduleMap.append(
                        "%s,%s,%s,%s,%s,%.9f,%s,%s" %
                        (seriesuid, noduleAnnot.id, noduleAnnot.coordX,
                         noduleAnnot.coordY, noduleAnnot.coordZ,
                         float(noduleAnnot.diameter_mm), int(-1), "NA"))
                    nodNoCandFile.write(
                        "%s,%s,%s,%s,%s,%.9f,%s\n" %
                        (seriesuid, noduleAnnot.id, noduleAnnot.coordX,
                         noduleAnnot.coordY, noduleAnnot.coordZ,
                         float(noduleAnnot.diameter_mm), str(-1)))

        # 将所有负样本添加到向量中
        for key, candidate3 in candidates2.iteritems():
            candFPs += 1
            FROCGTList.append(0.0)
            FROCProbList.append(float(candidate3.CADprobability))
            FPDivisorList.append(seriesuid)
            excludeList.append(False)
            FROCtoNoduleMap.append(
                "%s,%s,%s,%s,%s,%s,%.9f" %
                (seriesuid, -1, candidate3.coordX, candidate3.coordY,
                 candidate3.coordZ, str(
                     candidate3.id), float(candidate3.CADprobability)))

    if not (len(FROCGTList) == len(FROCProbList) and len(FROCGTList)
            == len(FPDivisorList) and len(FROCGTList) == len(FROCtoNoduleMap)
            and len(FROCGTList) == len(excludeList)):
        nodOutputfile.write(
            "Length of FROC vectors not the same, this should never happen! Aborting..\n"
        )

    nodOutputfile.write("Candidate detection results:\n")
    nodOutputfile.write("    True positives: %d\n" % candTPs)
    nodOutputfile.write("    False positives: %d\n" % candFPs)
    nodOutputfile.write("    False negatives: %d\n" % candFNs)
    nodOutputfile.write("    True negatives: %d\n" % candTNs)
    nodOutputfile.write("    Total number of candidates: %d\n" %
                        totalNumberOfCands)
    nodOutputfile.write("    Total number of nodules: %d\n" %
                        totalNumberOfNodules)

    nodOutputfile.write("    Ignored candidates on excluded nodules: %d\n" %
                        irrelevantCandidates)
    nodOutputfile.write(
        "    Ignored candidates which were double detections on a nodule: %d\n"
        % doubleCandidatesIgnored)
    if int(totalNumberOfNodules) == 0:
        nodOutputfile.write("    Sensitivity: 0.0\n")
    else:
        nodOutputfile.write("    Sensitivity: %.9f\n" %
                            (float(candTPs) / float(totalNumberOfNodules)))
    nodOutputfile.write("    Average number of candidates per scan: %.9f\n" %
                        (float(totalNumberOfCands) / float(len(seriesUIDs))))

    # 计算FROC
    fps, sens, thresholds = computeFROC(FROCGTList, FROCProbList,
                                        len(seriesUIDs), excludeList)

    if performBootstrapping:
        fps_bs_itp, sens_bs_mean, sens_bs_lb, sens_bs_up = computeFROC_bootstrap(
            FROCGTList,
            FROCProbList,
            FPDivisorList,
            seriesUIDs,
            excludeList,
            numberOfBootstrapSamples=numberOfBootstrapSamples,
            confidence=confidence)

    # 写FROC曲线
    with open(os.path.join(outputDir, "froc_%s.txt" % CADSystemName),
              'w') as f:
        for i in range(len(sens)):
            f.write("%.9f,%.9f,%.9f\n" % (fps[i], sens[i], thresholds[i]))

    # 将FROC向量写入磁盘
    with open(
            os.path.join(outputDir,
                         "froc_gt_prob_vectors_%s.csv" % CADSystemName),
            'w') as f:
        for i in range(len(FROCGTList)):
            f.write("%d,%.9f\n" % (FROCGTList[i], FROCProbList[i]))

    fps_itp = np.linspace(FROC_minX, FROC_maxX, num=10001)

    sens_itp = np.interp(fps_itp, fps, sens)

    if performBootstrapping:
        # 将平均值，下限和上限曲线写入磁盘
        with open(
                os.path.join(outputDir,
                             "froc_%s_bootstrapping.csv" % CADSystemName),
                'w') as f:
            f.write(
                "FPrate,Sensivity[Mean],Sensivity[Lower bound],Sensivity[Upper bound]\n"
            )
            for i in range(len(fps_bs_itp)):
                f.write("%.9f,%.9f,%.9f,%.9f\n" %
                        (fps_bs_itp[i], sens_bs_mean[i], sens_bs_lb[i],
                         sens_bs_up[i]))
    else:
        fps_bs_itp = None
        sens_bs_mean = None
        sens_bs_lb = None
        sens_bs_up = None

    # 创建FROC图
    if int(totalNumberOfNodules) > 0:
        graphTitle = str("")
        fig1 = plt.figure()
        ax = plt.gca()
        clr = 'b'
        plt.plot(fps_itp,
                 sens_itp,
                 color=clr,
                 label="%s" % CADSystemName,
                 lw=2)
        if performBootstrapping:
            plt.plot(fps_bs_itp, sens_bs_mean, color=clr, ls='--')
            plt.plot(fps_bs_itp, sens_bs_lb, color=clr,
                     ls=':')  # , label = "lb")
            plt.plot(fps_bs_itp, sens_bs_up, color=clr,
                     ls=':')  # , label = "ub")
            ax.fill_between(fps_bs_itp,
                            sens_bs_lb,
                            sens_bs_up,
                            facecolor=clr,
                            alpha=0.05)
        xmin = FROC_minX
        xmax = FROC_maxX
        plt.xlim(xmin, xmax)
        plt.ylim(0, 1)
        plt.xlabel('Average number of false positives per scan')
        plt.ylabel('Sensitivity')
        plt.legend(loc='lower right')
        plt.title('FROC performance - %s' % (CADSystemName))

        if bLogPlot:
            plt.xscale('log', basex=2)
            ax.xaxis.set_major_formatter(
                FixedFormatter([0.125, 0.25, 0.5, 1, 2, 4, 8]))

        # set your ticks manually
        ax.xaxis.set_ticks([0.125, 0.25, 0.5, 1, 2, 4, 8])
        ax.yaxis.set_ticks(np.arange(0, 1.1, 0.1))
        plt.grid(b=True, which='both')
        plt.tight_layout()

        plt.savefig(os.path.join(outputDir, "froc_%s.png" % CADSystemName),
                    bbox_inches=0,
                    dpi=300)

    return (fps, sens, thresholds, fps_bs_itp, sens_bs_mean, sens_bs_lb,
            sens_bs_up)

Example #8

Show file

File: noduleCADEvaluationLUNA16.py Project: ericsolo/python

def evaluateCAD(seriesUIDs, results_filename, outputDir, allNodules, CADSystemName, maxNumberOfCADMarks=-1,
                performBootstrapping=False,numberOfBootstrapSamples=1000,confidence = 0.95):
    '''
    function to evaluate a CAD algorithm
    @param seriesUIDs: list of the seriesUIDs of the cases to be processed
    @param results_filename: file with results
    @param outputDir: output directory
    @param allNodules: dictionary with all nodule annotations of all cases, keys of the dictionary are the seriesuids
    @param CADSystemName: name of the CAD system, to be used in filenames and on FROC curve
    '''

    nodOutputfile = open(os.path.join(outputDir,'CADAnalysis.txt'),'w')
    nodOutputfile.write("\n")
    nodOutputfile.write((60 * "*") + "\n")
    nodOutputfile.write("CAD Analysis: %s\n" % CADSystemName)
    nodOutputfile.write((60 * "*") + "\n")
    nodOutputfile.write("\n")

    results = csvTools.readCSV(results_filename)

    allCandsCAD = {}
    
    for seriesuid in seriesUIDs:
        
        # collect candidates from result file
        nodules = {}
        header = results[0]
        
        i = 0
        for result in results[1:]:
            nodule_seriesuid = result[header.index(seriesuid_label)]
            
            if seriesuid == nodule_seriesuid:
                nodule = getNodule(result, header)
                nodule.candidateID = i
                nodules[nodule.candidateID] = nodule
                i += 1

        if (maxNumberOfCADMarks > 0):
            # number of CAD marks, only keep must suspicous marks

            if len(nodules.keys()) > maxNumberOfCADMarks:
                # make a list of all probabilities
                probs = []
                for keytemp, noduletemp in nodules.iteritems():
                    probs.append(float(noduletemp.CADprobability))
                probs.sort(reverse=True) # sort from large to small
                probThreshold = probs[maxNumberOfCADMarks]
                nodules2 = {}
                nrNodules2 = 0
                for keytemp, noduletemp in nodules.iteritems():
                    if nrNodules2 >= maxNumberOfCADMarks:
                        break
                    if float(noduletemp.CADprobability) > probThreshold:
                        nodules2[keytemp] = noduletemp
                        nrNodules2 += 1

                nodules = nodules2
        
        print 'adding candidates: ' + seriesuid
        allCandsCAD[seriesuid] = nodules
    
    # open output files
    nodNoCandFile = open(os.path.join(outputDir, "nodulesWithoutCandidate_%s.txt" % CADSystemName), 'w')
    
    # --- iterate over all cases (seriesUIDs) and determine how
    # often a nodule annotation is not covered by a candidate

    # initialize some variables to be used in the loop
    candTPs = 0
    candFPs = 0
    candFNs = 0
    candTNs = 0
    totalNumberOfCands = 0
    totalNumberOfNodules = 0
    doubleCandidatesIgnored = 0
    irrelevantCandidates = 0
    minProbValue = -1000000000.0 # minimum value of a float
    FROCGTList = []
    FROCProbList = []
    FPDivisorList = []
    excludeList = []
    FROCtoNoduleMap = []
    ignoredCADMarksList = []

    # -- loop over the cases
    for seriesuid in seriesUIDs:
        # get the candidates for this case
        try:
            candidates = allCandsCAD[seriesuid]
        except KeyError:
            candidates = {}

        # add to the total number of candidates
        totalNumberOfCands += len(candidates.keys())

        # make a copy in which items will be deleted
        candidates2 = candidates.copy()

        # get the nodule annotations on this case
        try:
            noduleAnnots = allNodules[seriesuid]
        except KeyError:
            noduleAnnots = []

        # - loop over the nodule annotations
        for noduleAnnot in noduleAnnots:
            # increment the number of nodules
            if noduleAnnot.state == "Included":
                totalNumberOfNodules += 1

            x = float(noduleAnnot.coordX)
            y = float(noduleAnnot.coordY)
            z = float(noduleAnnot.coordZ)

            # 2. Check if the nodule annotation is covered by a candidate
            # A nodule is marked as detected when the center of mass of the candidate is within a distance R of
            # the center of the nodule. In order to ensure that the CAD mark is displayed within the nodule on the
            # CT scan, we set R to be the radius of the nodule size.
            diameter = float(noduleAnnot.diameter_mm)
            if diameter < 0.0:
              diameter = 10.0
            radiusSquared = pow((diameter / 2.0), 2.0)

            found = False
            noduleMatches = []
            for key, candidate in candidates.iteritems():
                x2 = float(candidate.coordX)
                y2 = float(candidate.coordY)
                z2 = float(candidate.coordZ)
                dist = math.pow(x - x2, 2.) + math.pow(y - y2, 2.) + math.pow(z - z2, 2.)
                if dist < radiusSquared:
                    if (noduleAnnot.state == "Included"):
                        found = True
                        noduleMatches.append(candidate)
                        if key not in candidates2.keys():
                            print "This is strange: CAD mark %s detected two nodules! Check for overlapping nodule annotations, SeriesUID: %s, nodule Annot ID: %s" % (str(candidate.id), seriesuid, str(noduleAnnot.id))
                        else:
                            del candidates2[key]
                    elif (noduleAnnot.state == "Excluded"): # an excluded nodule
                        if bOtherNodulesAsIrrelevant: #    delete marks on excluded nodules so they don't count as false positives
                            if key in candidates2.keys():
                                irrelevantCandidates += 1
                                ignoredCADMarksList.append("%s,%s,%s,%s,%s,%s,%.9f" % (seriesuid, -1, candidate.coordX, candidate.coordY, candidate.coordZ, str(candidate.id), float(candidate.CADprobability)))
                                del candidates2[key]
            if len(noduleMatches) > 1: # double detection
                doubleCandidatesIgnored += (len(noduleMatches) - 1)
            if noduleAnnot.state == "Included":
                # only include it for FROC analysis if it is included
                # otherwise, the candidate will not be counted as FP, but ignored in the
                # analysis since it has been deleted from the nodules2 vector of candidates
                if found == True:
                    # append the sample with the highest probability for the FROC analysis
                    maxProb = None
                    for idx in range(len(noduleMatches)):
                        candidate = noduleMatches[idx]
                        if (maxProb is None) or (float(candidate.CADprobability) > maxProb):
                            maxProb = float(candidate.CADprobability)

                    FROCGTList.append(1.0)
                    FROCProbList.append(float(maxProb))
                    FPDivisorList.append(seriesuid)
                    excludeList.append(False)
                    FROCtoNoduleMap.append("%s,%s,%s,%s,%s,%.9f,%s,%.9f" % (seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameter_mm), str(candidate.id), float(candidate.CADprobability)))
                    candTPs += 1
                else:
                    candFNs += 1
                    # append a positive sample with the lowest probability, such that this is added in the FROC analysis
                    FROCGTList.append(1.0)
                    FROCProbList.append(minProbValue)
                    FPDivisorList.append(seriesuid)
                    excludeList.append(True)
                    FROCtoNoduleMap.append("%s,%s,%s,%s,%s,%.9f,%s,%s" % (seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameter_mm), int(-1), "NA"))
                    nodNoCandFile.write("%s,%s,%s,%s,%s,%.9f,%s\n" % (seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameter_mm), str(-1)))

        # add all false positives to the vectors
        for key, candidate3 in candidates2.iteritems():
            candFPs += 1
            FROCGTList.append(0.0)
            FROCProbList.append(float(candidate3.CADprobability))
            FPDivisorList.append(seriesuid)
            excludeList.append(False)
            FROCtoNoduleMap.append("%s,%s,%s,%s,%s,%s,%.9f" % (seriesuid, -1, candidate3.coordX, candidate3.coordY, candidate3.coordZ, str(candidate3.id), float(candidate3.CADprobability)))

    if not (len(FROCGTList) == len(FROCProbList) and len(FROCGTList) == len(FPDivisorList) and len(FROCGTList) == len(FROCtoNoduleMap) and len(FROCGTList) == len(excludeList)):
        nodOutputfile.write("Length of FROC vectors not the same, this should never happen! Aborting..\n")

    nodOutputfile.write("Candidate detection results:\n")
    nodOutputfile.write("    True positives: %d\n" % candTPs)
    nodOutputfile.write("    False positives: %d\n" % candFPs)
    nodOutputfile.write("    False negatives: %d\n" % candFNs)
    nodOutputfile.write("    True negatives: %d\n" % candTNs)
    nodOutputfile.write("    Total number of candidates: %d\n" % totalNumberOfCands)
    nodOutputfile.write("    Total number of nodules: %d\n" % totalNumberOfNodules)

    nodOutputfile.write("    Ignored candidates on excluded nodules: %d\n" % irrelevantCandidates)
    nodOutputfile.write("    Ignored candidates which were double detections on a nodule: %d\n" % doubleCandidatesIgnored)
    if int(totalNumberOfNodules) == 0:
        nodOutputfile.write("    Sensitivity: 0.0\n")
    else:
        nodOutputfile.write("    Sensitivity: %.9f\n" % (float(candTPs) / float(totalNumberOfNodules)))
    nodOutputfile.write("    Average number of candidates per scan: %.9f\n" % (float(totalNumberOfCands) / float(len(seriesUIDs))))

    # compute FROC
    fps, sens, thresholds = computeFROC(FROCGTList,FROCProbList,len(seriesUIDs),excludeList)
    
    if performBootstrapping:
        fps_bs_itp,sens_bs_mean,sens_bs_lb,sens_bs_up = computeFROC_bootstrap(FROCGTList,FROCProbList,FPDivisorList,seriesUIDs,excludeList,
                                                                  numberOfBootstrapSamples=numberOfBootstrapSamples, confidence = confidence)
        
    # Write FROC curve
    with open(os.path.join(outputDir, "froc_%s.txt" % CADSystemName), 'w') as f:
        for i in range(len(sens)):
            f.write("%.9f,%.9f,%.9f\n" % (fps[i], sens[i], thresholds[i]))
    
    # Write FROC vectors to disk as well
    with open(os.path.join(outputDir, "froc_gt_prob_vectors_%s.csv" % CADSystemName), 'w') as f:
        for i in range(len(FROCGTList)):
            f.write("%d,%.9f\n" % (FROCGTList[i], FROCProbList[i]))

    fps_itp = np.linspace(FROC_minX, FROC_maxX, num=10001)
    
    sens_itp = np.interp(fps_itp, fps, sens)
    
    if performBootstrapping:
        # Write mean, lower, and upper bound curves to disk
        with open(os.path.join(outputDir, "froc_%s_bootstrapping.csv" % CADSystemName), 'w') as f:
            f.write("FPrate,Sensivity[Mean],Sensivity[Lower bound],Sensivity[Upper bound]\n")
            for i in range(len(fps_bs_itp)):
                f.write("%.9f,%.9f,%.9f,%.9f\n" % (fps_bs_itp[i], sens_bs_mean[i], sens_bs_lb[i], sens_bs_up[i]))
    else:
        fps_bs_itp = None
        sens_bs_mean = None
        sens_bs_lb = None
        sens_bs_up = None

    # create FROC graphs
    if int(totalNumberOfNodules) > 0:
        graphTitle = str("")
        fig1 = plt.figure()
        ax = plt.gca()
        clr = 'b'
        plt.plot(fps_itp, sens_itp, color=clr, label="%s" % CADSystemName, lw=2)
        if performBootstrapping:
            plt.plot(fps_bs_itp, sens_bs_mean, color=clr, ls='--')
            plt.plot(fps_bs_itp, sens_bs_lb, color=clr, ls=':') # , label = "lb")
            plt.plot(fps_bs_itp, sens_bs_up, color=clr, ls=':') # , label = "ub")
            ax.fill_between(fps_bs_itp, sens_bs_lb, sens_bs_up, facecolor=clr, alpha=0.05)
        xmin = FROC_minX
        xmax = FROC_maxX
        plt.xlim(xmin, xmax)
        plt.ylim(0, 1)
        plt.xlabel('Average number of false positives per scan')
        plt.ylabel('Sensitivity')
        plt.legend(loc='lower right')
        plt.title('FROC performance - %s' % (CADSystemName))
        
        if bLogPlot:
            plt.xscale('log', basex=2)
            ax.xaxis.set_major_formatter(FixedFormatter([0.125,0.25,0.5,1,2,4,8]))
        
        # set your ticks manually
        ax.xaxis.set_ticks([0.125,0.25,0.5,1,2,4,8])
        ax.yaxis.set_ticks(np.arange(0, 1.1, 0.1))
        plt.grid(b=True, which='both')
        plt.tight_layout()

        plt.savefig(os.path.join(outputDir, "froc_%s.png" % CADSystemName), bbox_inches=0, dpi=300)

    return (fps, sens, thresholds, fps_bs_itp, sens_bs_mean, sens_bs_lb, sens_bs_up)