def getScan():
    metadatas = csvTools.readCSV('files/LIDC-IDRI_MetaData.csv')
    CTList = []
    Patient_Id = []
    Series_UID = []
    sign = 0
    for metadata in metadatas:
        if sign != 0:
            if metadata[4] == 'CT':
                ctdata = []
                ctdata.append(metadata[1])
                Patient_Id.append(metadata[1])
                ctdata.append(metadata[9])
                Series_UID.append(metadata[9])

                CTList.append(ctdata)
        sign += 1
    # csvTools.writeCSV('PatientId_SeriesUID.csv', CTList)

    patients = os.listdir(basedir)
    scan = []
    for patient in patients:
        idlist1 = os.listdir(basedir + patient)
        for level1 in idlist1:
            idlist2 = os.listdir(basedir + patient + '/' + level1)
            for level2 in idlist2:
                if level2 in Series_UID:
                    temp = patient + '/' + level1 + '/' + level2
                    scan.append(temp)
    return scan
Exemple #2
0
def collect(annotations_filename,results_filename):
    annotations = csvTools.readCSV(annotations_filename)
    results = csvTools.readCSV(results_filename)
    seriesUIDs = []
    header = results[0]
    # get the uilds from the predicts
    for result in results[1:]:
        seriesUIDs.append(result[header.index(seriesuid_label)])

    seriesUIDs = sorted(set(seriesUIDs),key=seriesUIDs.index)
    for i in range(len(seriesUIDs)):
        print(seriesUIDs[i])
    #get the nodules of the uids from annotations
    allNodules = collectNoduleAnnotations(annotations, seriesUIDs)

    return (allNodules, seriesUIDs)
def get_images_and_labels(data_root_dir):
    '''
    Modified by WangQL
    2020/5/27
    '''
    csv = os.listdir(data_root_dir)
    data = []
    for one_csv in csv:
        data += csvTools.readCSV(os.path.join(data_root_dir, one_csv))

    all_image_path = []
    all_image_label = []

    data_path = 'D:/Data/nodules/ori_hu/'
    npy_files = os.listdir(data_path)

    for one_data in data:
        id = one_data[0]
        if 'low' in one_data[1]:
            label = [0]
        else:
            label = [1]
        # print(id)
        npy = np.load(os.path.join(data_path, one_data[0] + '.npy'))
        npy = truncate_hu(npy)
        nor_npy = normalization(npy)
        # print(nor_npy)
        all_image_path.append(nor_npy)
        all_image_label.append(label)
        if 'high' in one_data[1]:
            all_image_path.append(nor_npy)
            all_image_label.append(label)

    # # get all images' paths (format: string)
    # data_root = pathlib.Path(data_root_dir)
    # all_image_path = [str(path) for path in list(data_root.glob('*/*'))]
    # # get labels' names
    # label_names = sorted(item.name for item in data_root.glob('*/'))
    # # dict: {label : index}
    # label_to_index = dict((label, index) for index, label in enumerate(label_names))
    # # get all images' labels
    # all_image_label = [label_to_index[pathlib.Path(single_image_path).parent.name] for single_image_path in all_image_path]

    # return all_image_path, all_image_label
    # # get all images' paths (format: string)
    # data_root = pathlib.Path(data_root_dir)
    # all_image_path = [str(path) for path in list(data_root.glob('*/*'))]
    # # get labels' names
    # label_names = sorted(item.name for item in data_root.glob('*/'))
    # # dict: {label : index}
    # label_to_index = dict((label, index) for index, label in enumerate(label_names))
    # # get all images' labels
    # all_image_label = [label_to_index[pathlib.Path(single_image_path).parent.name] for single_image_path in all_image_path]

    return all_image_path, all_image_label
    def __init__(self):
        self.wordModel = Word2Vec.load('./txt/word2vec.model')
        self.transformedlistd = csvTools.readCSV('./txt/transformedlistd.csv')
        sentences = []

        for onecontext in self.transformedlistd:
            words = onecontext[3].split(' ')
            sentences.append(words)

        model = Word2Vec(sentences, size=50, window=3, min_count=1, workers=2)
        self.model = model
        vector = model['上腹', '疼痛', '周']
    def load_lidc(self):
        '''
        LIDC data size: 128, 128, 1
        '''
        data_dir = os.path.join('./huimages/')
        mask_dir = os.path.join('./masks/')
        noduleinfo = csvTools.readCSV('files/malignancy.csv')

        self.images_list = os.listdir(data_dir)
        self.masks_list = os.listdir(mask_dir)
        self.noduleinfo = noduleinfo

        print('number of images_list: ', len(self.images_list))
        print('number of mask_dir: ', len(self.masks_list))
        print('number of noduleinfo: ', len(self.noduleinfo))

        trainingdata = []
       
        for onenodule in noduleinfo:
            scanid = onenodule[1]
            scanid = caseid_to_scanid(int(scanid))
            noduleid = onenodule[3]
            scan_list_id = onenodule[2]

            nodule_image_name = str(scanid) + '_' + str(noduleid) + '_' + str(scan_list_id) + '.npy'
            if nodule_image_name in self.images_list:
                # attributes labels 
                lobulation = onenodule[28]
                spiculation = onenodule[27]
                malignancy = onenodule[29]

                if float(lobulation) >= 0 or float(spiculation) >= 0:
                    trainingdata.append(onenodule)
                # data augmentation
                if float(lobulation) >= 3 or float(spiculation) >= 3:
                    import copy
                    copy.deepcopy
                    left = copy.deepcopy(onenodule)
                    right = copy.deepcopy(onenodule)
                    down = copy.deepcopy(onenodule)
                    left.append('left')
                    right.append('right')
                    down.append('down')
                    trainingdata.append(left)
                    trainingdata.append(right)
                    trainingdata.append(down)   

        
        print('number of training data: ', len(trainingdata)) 
        trainingdata = np.array(trainingdata)
        shuffle(trainingdata)
        self.data = trainingdata
Exemple #6
0

def getAve(numlist):
    ave = 0
    if len(numlist) != 4:
        print(numlist)
    for num in numlist:
        ave += int(num)
    if len(numlist) == 0:
        ave = 0
    else:
        ave = ave / len(numlist)
    return ave


nodule_chara = csvTools.readCSV(csvdir + 'nodule_chara_list.csv')

list3 = csvTools.readCSV(csvdir + 'list3.2.csv')
list3 = list3[1:len(list3)]
count = 0
sign = 0

list3_2 = []
for item in list3:
    id = item[0]
    case = item[1]
    case = caseid_to_scanid(int(case))

    ldlist = []
    for ids in item[10:14]:
        item.append(ids)
Exemple #7
0
import os
import pandas as pd
import pydicom
import scipy.misc
import cv2
import numpy as np
import glob

import xmlopt

basedir = '/home/wangqiuli/Data/LIDC/DOI/'
resdir = 'noduleimage/'
imagedir = 'ori_images/'
maskdir = 'ori_masks/'

noduleinfo = csvTools.readCSV('files/malignancy.csv')
idscaninfo = csvTools.readCSV('files/id_scan.txt')
maskinfo = glob.glob(maskdir)


def get_pixels_hu(ds):
    image = ds.pixel_array
    image = np.array(image, dtype=np.float32)
    intercept = ds.RescaleIntercept
    slope = ds.RescaleSlope
    image = image * slope
    image += intercept
    return image


caselist = os.listdir(imagedir)
Exemple #8
0
basedir = 'D:/Data/LIDC-IDRI/DOI/'
csvdir= 'files/list3.2.csv'

import Func_get_PatientId_SeriesUID

listscan = Func_get_PatientId_SeriesUID.getScan()
# print(listscan[0])
# print(len(listscan))
'''
LIDC-IDRI-0001/1.3.6.1.4.1.14519.5.2.1.6279.6001.298806137288633453246975630178/1.3.6.1.4.1.14519.5.2.1.6279.6001.179049373636438705059720603192
1018

'''

metadatas = csvTools.readCSV(csvdir)
# print(len(metadatas))
'''
['id', 'case', 'scan', 'roi', 'volume', 'eq. diam.', 'x loc.', 'y loc.', 'slice no.', '', 'nodIDs', '', '', '', '', '', '']
'''
metadatas = metadatas[1:len(metadatas)]
# ==print(metadatas[0])
# print(metadatas[len(metadatas) - 1])
'''
['1', '1', '3000566', '1', '6459.75', '23.107', '317', '367', '43', '', 'IL057_127364', 'Nodule 001', 'MI014_12127', '0', '', '', '']
['2635', '1012', '32231', '1', '122.2', '6.157', '145', '153', '105', '', '0', '201255', '157143', 'Nodule 001', '', '', '']
'''

# restore nodule characteristics
noudle_chara_list = []
from config import Config
import tensorflow as tf
import os
import numpy as np
from tqdm import tqdm
from newmodel import RCNNMODEL
import csvTools
import random
import tensorflow.contrib.slim as slim
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.4)
data = Data()

checkpoint_file = 'inception_v3_2016_08_28/inception_v3.ckpt'
traintxt = csvTools.readCSV('./labels/cross2.csv') + csvTools.readCSV(
    './labels/cross3.csv') + csvTools.readCSV(
        './labels/cross4.csv') + csvTools.readCSV('./labels/cross5.csv')
testtxt = csvTools.readCSV('./labels/cross1.csv')
validtxt = csvTools.readCSV('./labels/cross1.csv')

traindata = []
for one in traintxt:
    if len(one) != 0:

        traindata.append(one)

testdata = []
for one in testtxt:
    if len(one) != 0:
Exemple #10
0
# -*- coding:utf-8 -*-

from pyltp import SentenceSplitter
from pyltp import Segmentor
import csvTools
import re
from string import digits
from zhon.hanzi import punctuation
from tqdm import tqdm
import os

segmentor = Segmentor()
segmentor.load_with_lexicon('./ltp/cws.model','lexicon.txt')

contexts2018 = csvTools.readCSV('2018all.csv')
contexts2017 = csvTools.readCSV('2017.csv')
contexts2016 = csvTools.readCSV('2016.csv')

contexts = contexts2016+contexts2017+contexts2018
print(len(contexts))

# birthlist = []
descriptionlist = []
dignosislist = []
for context in contexts:
    # birth = context[3]
    description = context[6]
    dignosis = context[7]
    # birthlist.append(birth)
    descriptionlist.append(description)
    dignosislist.append(dignosis)
Exemple #11
0
'''
Created by Wang Qiu Li
6/27/2018
get relationship between id and CT scans(cause LIDC also contains X-ray)

PatientId_SeriesUID.csv is for dataloader. You can get series related to the patient id.
'''

import csvTools
import os

basedir = 'D:/Data/LIDC-IDRI/DOI/'

metadatas = csvTools.readCSV('csv_xls/LIDC-IDRI_MetaData.csv')
print(len(metadatas))
# print(metadatas[0])
'''
10 column
['Collection', 'Patient Id', 'Study Date', 
 'Study Description', 'Modality', 'Series Description',
 'Manufacturer', 'Manufacturer Model', 'Software Version', 'Series UID']
'''
'''
LIDC-IDRI-0132
LIDC-IDRI-0151
LIDC-IDRI-0315
LIDC-IDRI-0332
LIDC-IDRI-0355
LIDC-IDRI-0365
LIDC-IDRI-0442
LIDC-IDRI-0484
Exemple #12
0
def evaluateCAD(label,seriesUIDs, results_filename, outputDir, allNodules, CADSystemName, maxNumberOfCADMarks=-1,
                performBootstrapping=False, numberOfBootstrapSamples=1000, confidence=0.95):
    '''
    function to evaluate a CAD algorithm
    @param seriesUIDs: list of the seriesUIDs of the cases to be processed
    @param results_filename: file with predict results
    @param outputDir: output directory
    @param allNodules: dictionary with all nodule annotations of all cases, keys of the dictionary are the seriesuids
    @param CADSystemName: name of the CAD system, to be used in filenames and on FROC curve
    '''

    nodOutputfile = open(os.path.join(outputDir, 'CADAnalysis_%s.txt' % label), 'w')
    nodOutputfile.write("\n")
    nodOutputfile.write((60 * "*") + "\n")
    nodOutputfile.write("CAD Analysis: %s\n" % CADSystemName)
    nodOutputfile.write((60 * "*") + "\n")
    nodOutputfile.write("\n")

    results = csvTools.readCSV(results_filename)

    allCandsCAD = {}

    for seriesuid in seriesUIDs:

        # collect candidates from result file
        nodules = {}
        header = results[0]

        i = 0
        # get the class of label nodules
        for result in results[1:]:
            # predict result
            nodule_seriesuid = result[header.index(seriesuid_label)]
            # when corred to gt, get the pre info
            if seriesuid == nodule_seriesuid and float(result[header.index(cls_class)]) == float(label):
                nodule = getNodule(result, header)
                nodule.candidateID = i
                nodules[nodule.candidateID] = nodule
                i += 1

        if (maxNumberOfCADMarks > 0):
            # number of CAD marks, only keep must suspicous marks

            if len(nodules.keys()) > maxNumberOfCADMarks:
                # make a list of all probabilities
                probs = []
                for keytemp, noduletemp in nodules.items():
                    probs.append(float(noduletemp.CADprobability))
                probs.sort(reverse=True)  # sort from large to small
                probThreshold = probs[maxNumberOfCADMarks]
                nodules2 = {}
                nrNodules2 = 0
                for keytemp, noduletemp in nodules.items():
                    if nrNodules2 >= maxNumberOfCADMarks:
                        break
                    if float(noduletemp.CADprobability) > probThreshold:
                        nodules2[keytemp] = noduletemp
                        nrNodules2 += 1

                nodules = nodules2

        print('adding candidates: ' + seriesuid)
        allCandsCAD[seriesuid] = nodules

    # open output files
    nodNoCandFile = open(os.path.join(outputDir, "nodulesWithoutCandidate_%s_%s.txt" % (CADSystemName,label)), 'w')

    # --- iterate over all cases (seriesUIDs) and determine how
    # often a nodule annotation is not covered by a candidate

    # initialize some variables to be used in the loop
    candTPs = 0
    candFPs = 0
    candFNs = 0
    candTNs = 0
    totalNumberOfCands = 0
    totalNumberOfNodules = 0
    doubleCandidatesIgnored = 0
    irrelevantCandidates = 0
    minProbValue = -1000000000.0  # minimum value of a float
    FROCGTList = []
    FROCProbList = []
    FPDivisorList = []
    excludeList = []
    FROCtoNoduleMap = []

    # -- loop over the cases
    for seriesuid in seriesUIDs:
        # get the candidates from the predicts
        try:
            candidates = allCandsCAD[seriesuid]
        except KeyError:
            candidates = {}

        # add to the total number of candidates
        totalNumberOfCands += len(candidates.keys())

        # make a copy in which items will be deleted
        candidates2 = candidates.copy()

        # get the nodule annotations on this case
        try:
            noduleAnnots = allNodules[seriesuid]
            print("label %s seriesuid %s has %d nodules"%(label,seriesuid,len(allNodules[seriesuid])))
        except KeyError:
            noduleAnnots = []

        # - loop over the nodule annotations
        for noduleAnnot in noduleAnnots:
            # increment the number of nodules
            # if noduleAnnot.state == "Included":
            totalNumberOfNodules += 1

            x = float(noduleAnnot.coordX)
            y = float(noduleAnnot.coordY)
            z = float(noduleAnnot.coordZ)

            # 2. Check if the nodule annotation is covered by a candidate
            # A nodule is marked as detected when the center of mass of the candidate is within a distance R of
            # the center of the nodule. In order to ensure that the CAD mark is displayed within the nodule on the
            # CT scan, we set R to be the radius of the nodule size.
            diameterX = float(noduleAnnot.diameterX)/2.0
            diameterY = float(noduleAnnot.diameterY)/2.0
            diameterZ = float(noduleAnnot.diameterZ)/2.0
            if diameterX < 2.0:
                diameterX = 2.0
            if diameterY < 2.0:
                diameterY = 2.0
            if diameterZ < 2.0:
                diameterZ = 2.0
            radiusSquaredX = diameterX / 2.0
            radiusSquaredY = diameterY / 2.0
            radiusSquaredZ = diameterZ / 2.0


            found = False
            noduleMatches = []
            # compute if the pre in the area of the gt
            for key, candidate in candidates.items():
                x2 = float(candidate.coordX)
                y2 = float(candidate.coordY)
                z2 = float(candidate.coordZ)

                distX = math.fabs(x-x2)
                distY = math.fabs(y-y2)
                distZ = math.fabs(z-z2)

                if distX <= radiusSquaredX and distY <= radiusSquaredY and distZ <= radiusSquaredZ:
                    found = True
                    noduleMatches.append(candidate)
                    if key in candidates2.keys():
                        del candidates2[key]
            # if for one gt, there is one more pres, only leave one pre
            if len(noduleMatches) > 1:  # double detection
                doubleCandidatesIgnored += (len(noduleMatches) - 1)

            if found == True:
                # append the sample with the highest probability for the FROC analysis
                maxProb = None
                for idx in range(len(noduleMatches)):
                    candidate = noduleMatches[idx]
                    if (maxProb is None) or (float(candidate.CADprobability) > maxProb):
                        maxProb = float(candidate.CADprobability)
                print("seriesuid %s get prediction prob %.9f"%(seriesuid,float(candidate.CADprobability)))
                FROCGTList.append(1.0)
                FROCProbList.append(float(maxProb))
                FPDivisorList.append(seriesuid)
                excludeList.append(False)
                FROCtoNoduleMap.append("%s,%s,%s,%s,%s,%.9f,%.9f,%.9f,%s,%.9f" % (
                    seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ,
                    float(noduleAnnot.diameterX),float(noduleAnnot.diameterY),float(noduleAnnot.diameterZ),
                    str(candidate.id), float(candidate.CADprobability)))
                candTPs += 1
            else:
                candFNs += 1
                # append a positive sample with the lowest probability, such that this is added in the FROC analysis
                FROCGTList.append(1.0)
                FROCProbList.append(minProbValue)
                FPDivisorList.append(seriesuid)
                excludeList.append(True)
                FROCtoNoduleMap.append("%s,%s,%s,%s,%s,%.9f,%.9f,%.9f,%s,%s" % (
                seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ,
                float(noduleAnnot.diameterX),float(noduleAnnot.diameterY),float(noduleAnnot.diameterZ), int(-1), "NA"))
                nodNoCandFile.write("%s,%s,%s,%s,%s,%.9f,%.9f,%.9f,%s\n" % (
                seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ,
                float(noduleAnnot.diameterX),float(noduleAnnot.diameterY),float(noduleAnnot.diameterZ),
                str(-1)))

        # add all false positives to the vectors
        for key, candidate3 in candidates2.items():
            candFPs += 1
            FROCGTList.append(0.0)
            FROCProbList.append(float(candidate3.CADprobability))
            FPDivisorList.append(seriesuid)
            excludeList.append(False)
            FROCtoNoduleMap.append("%s,%s,%s,%s,%s,%s,%.9f" % (
            seriesuid, -1, candidate3.coordX, candidate3.coordY, candidate3.coordZ, str(candidate3.id),
            float(candidate3.CADprobability)))

    if not (len(FROCGTList) == len(FROCProbList) and len(FROCGTList) == len(FPDivisorList) and len(FROCGTList) == len(
            FROCtoNoduleMap) and len(FROCGTList) == len(excludeList)):
        nodOutputfile.write("Length of FROC vectors not the same, this should never happen! Aborting..\n")

    nodOutputfile.write("Candidate detection results:\n")
    nodOutputfile.write("    True positives: %d\n" % candTPs)
    nodOutputfile.write("    False positives: %d\n" % candFPs)
    nodOutputfile.write("    False negatives: %d\n" % candFNs)
    nodOutputfile.write("    True negatives: %d\n" % candTNs)
    nodOutputfile.write("    Total number of candidates: %d\n" % totalNumberOfCands)
    nodOutputfile.write("    Total number of nodules: %d\n" % totalNumberOfNodules)

    nodOutputfile.write("    Ignored candidates on excluded nodules: %d\n" % irrelevantCandidates)
    nodOutputfile.write(
        "    Ignored candidates which were double detections on a nodule: %d\n" % doubleCandidatesIgnored)
    if int(totalNumberOfNodules) == 0:
        nodOutputfile.write("    Sensitivity: 0.0\n")
    else:
        nodOutputfile.write("    Sensitivity: %.9f\n" % (float(candTPs) / float(totalNumberOfNodules)))
    nodOutputfile.write(
        "    Average number of candidates per scan: %.9f\n" % (float(totalNumberOfCands) / float(len(seriesUIDs))))
    print(FROCGTList)
    print(FROCProbList)
    # compute FROC
    fps, sens, thresholds = computeFROC(FROCGTList, FROCProbList, len(seriesUIDs), excludeList)
    print(fps)
    print(sens)
    print(thresholds)

    if performBootstrapping:
        fps_bs_itp, sens_bs_mean, sens_bs_lb, sens_bs_up = computeFROC_bootstrap(FROCGTList, FROCProbList,
                                                                                 FPDivisorList, seriesUIDs, excludeList,
                                                                                 numberOfBootstrapSamples=numberOfBootstrapSamples,
                                                                                 confidence=confidence)

    # Write FROC curve
    with open(os.path.join(outputDir, "froc_%s_%s.txt" % (CADSystemName,label)), 'w') as f:
        for i in range(len(sens)):
            f.write("%.9f,%.9f,%.9f\n" % (fps[i], sens[i], thresholds[i]))

    # Write FROC vectors to disk as well
    with open(os.path.join(outputDir, "froc_gt_prob_vectors_%s_%s.csv" % (CADSystemName,label)), 'w') as f:
        for i in range(len(FROCGTList)):
            f.write("%d,%.9f\n" % (FROCGTList[i], FROCProbList[i]))

    fps_itp = np.linspace(FROC_minX, FROC_maxX, num=10001)

    sens_itp = np.interp(fps_itp, fps, sens)
    score = 0
    for i in range(len(fps_itp)):
        # print("asdaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"+str(fps_itp[i]))
        if Decimal(fps_itp[i]).quantize(Decimal("0.000")) in [0.125, 0.250, 0.500, 1.000, 2.000, 4.000, 8.000]:
            # print("asdddddddddddddddddddddddddddddddddddddddddddddd"+str(fps_itp[i]))
            score += sens_itp[i]
            # print("fps_itp %.2f ,sens_itp %.2f"%(fps_itp[i],sens_itp[i]))
            nodOutputfile.write("fps_itp %.2f ,sens_itp %.2f\n"%(fps_itp[i],sens_itp[i]))
    score = score/7.0
    print("    Average sensivity over seven fps for label %s: %.9f\n" % (label,score))
    nodOutputfile.write(
        "    Average sensivity over seven fps for label %s: %.9f\n" % (label,score))

    if performBootstrapping:
        # Write mean, lower, and upper bound curves to disk
        with open(os.path.join(outputDir, "froc_%s_bootstrapping_%s.csv" % (CADSystemName,label)), 'w') as f:
            f.write("FPrate,Sensivity[Mean],Sensivity[Lower bound],Sensivity[Upper bound]\n")
            for i in range(len(fps_bs_itp)):
                f.write("%.9f,%.9f,%.9f,%.9f\n" % (fps_bs_itp[i], sens_bs_mean[i], sens_bs_lb[i], sens_bs_up[i]))
    else:
        fps_bs_itp = None
        sens_bs_mean = None
        sens_bs_lb = None
        sens_bs_up = None

    # create FROC graphs
    if int(totalNumberOfNodules) > 0:
        graphTitle = str("")
        fig1 = plt.figure()
        ax = plt.gca()
        clr = 'b'
        plt.plot(fps_itp, sens_itp, color=clr, label="%s" % CADSystemName, lw=2)
        if performBootstrapping:
            plt.plot(fps_bs_itp, sens_bs_mean, color=clr, ls='--')
            plt.plot(fps_bs_itp, sens_bs_lb, color=clr, ls=':')  # , label = "lb")
            plt.plot(fps_bs_itp, sens_bs_up, color=clr, ls=':')  # , label = "ub")
            ax.fill_between(fps_bs_itp, sens_bs_lb, sens_bs_up, facecolor=clr, alpha=0.05)
        xmin = FROC_minX
        xmax = FROC_maxX
        plt.xlim(xmin, xmax)
        plt.ylim(0, 1)
        plt.xlabel('Average number of false positives per scan')
        plt.ylabel('Sensitivity')
        plt.legend(loc='lower right')
        plt.title('FROC performance - %s' % (CADSystemName))

        if bLogPlot:
            plt.xscale('log', basex=2)
            ax.xaxis.set_major_formatter(FixedFormatter([0.125, 0.25, 0.5, 1, 2, 4, 8]))

        # set your ticks manually
        ax.xaxis.set_ticks([0.125, 0.25, 0.5, 1, 2, 4, 8])
        ax.yaxis.set_ticks(np.arange(0, 1.1, 0.1))
        plt.grid(b=True, which='both')
        plt.tight_layout()

        plt.savefig(os.path.join(outputDir, "froc_%s_%s.png" % (CADSystemName,label)), bbox_inches=0, dpi=300)

    return (score,fps, sens, thresholds, fps_bs_itp, sens_bs_mean, sens_bs_lb, sens_bs_up)
Exemple #13
0
# -*- coding:utf-8 -*-

from gensim.test.utils import common_texts, get_tmpfile
from gensim.models import Word2Vec
import csvTools

path = get_tmpfile("word2vec.model")  #创建临时文件

contexts = csvTools.readCSV('./transformedlistd.csv')

sentences = []

for onecontext in contexts:
    words = onecontext[3].split(' ')
    sentences.append(words)

#加载模型
# model = Word2Vec.load("word2vec.model")
model = Word2Vec(sentences, size=50, window=3, min_count=1, workers=2)
model.train(words, total_examples=1, epochs=1)

# model.save("word2vec.model")
vector = model['咳嗽', '发热', '畏寒', '活动', '后', '气促', '伴', '心悸', '余天']
print(vector.shape)
Exemple #14
0
    if n > ls_len:
        return []
    elif n == ls_len:
        return [[i] for i in ls]
    else:
        j = ls_len // n
        k = ls_len % n
        ls_return = []
        for i in range(0, (n - 1) * j, j):
            ls_return.append(ls[i:i + j])
        ls_return.append(ls[(n - 1) * j:])
        return ls_return


data = Data()
alldata = csvTools.readCSV(
    '/raid/data/wangqiuli/Documents/pneumonia/label/all.csv')
dividedall = div_list(alldata, 5)
# print(len(dividedall[0]))
# for onep in dividedall[0]:
#     print(onep)

# traindata = dividedall[0] + dividedall[1] +dividedall[2] +dividedall[3]
# testdata = dividedall[4]
# print(len(traindata))
# print(len(testdata))

import multiprocessing
import time
import thread

npypath = '/home/wangqiuli/raid/pneumonia/numpyfiles/'
Exemple #15
0
from config import Config
import tensorflow as tf
import os
import numpy as np 
from tqdm import tqdm
from newmodel import RCNNMODEL
import csvTools
import random
import tensorflow.contrib.slim as slim
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.45)
data = Data()

checkpoint_file = 'inception_v3_2016_08_28/inception_v3.ckpt'  
traintxt = csvTools.readCSV('./labels/train.csv')
testtxt = csvTools.readCSV('./labels/test.csv')
validtxt = csvTools.readCSV('./labels/valid.csv')
traindata = []

for one in traintxt:
    if len(one) != 0:
        
        traindata.append(one)

testdata=[]

for one in testtxt:
    if len(one) != 0:
        
        testdata.append(one)
'''
Created by WangQL
2020/5/27

'''

import csvTools
import os

labelCSV = csvTools.readCSV('/home/wangqiuli/raid/senet_data/files/malignancy.csv')
print('number of samples: ', len(labelCSV))

data_path = '/home/wangqiuli/raid/senet_data/ori_hu/'
npy_data = os.listdir(os.path.join(data_path))
print('number of npy: ', len(npy_data))

data_dic_low = {}
data_dic_high = {}
data_dic_mid = {}

for one_npy in npy_data:
    temp_name = one_npy
    patient_id = temp_name[:temp_name.find('_')]
    patient_id = int(patient_id[len(patient_id) - 4:])
    # print(patient_id)

    temp_name = temp_name[temp_name.find('_') + 1:]
    nodule_id = temp_name[:temp_name.find('_')]
    # print(nodule_id)
    
    temp_name = temp_name[temp_name.find('_') + 1:]
Exemple #17
0
'''
Created by WangQL
4.10.2020

counting nodules according to its different attributes
'''

import csvTools

labels = csvTools.readCSV('files/malignancy.csv')

index = 26
countlob1 = []
countlob2 = []
countlob3 = []
countlob4 = []
countlob5 = []

for one in labels:
    sign = round(float(one[27]))
    if sign == 1:
        countlob1.append(one)
    elif sign == 2:
        countlob2.append(one)
    elif sign == 3:
        countlob3.append(one)
    elif sign == 4:
        countlob4.append(one)
    elif sign == 5:
        countlob5.append(one)
Exemple #18
0
'''
Created by Wang Qiu Li
7/3/2018

divide the nodules into two groups
level 1 and 2 are classified into LMNs
level 4 and 5 are classified into HMHs
level 3 are excluded

'''

import csvTools

nodules = csvTools.readCSV('files/malignancy.csv')

# print(nodules[2][29])
# index 29 is malignancy level

lmns = []
hmns = []
mid = []

for nodule in nodules:
    level = float(nodule[29])
    if level >= 3.5:
        hmns.append(nodule)
    elif level < 2.5:
        lmns.append(nodule)
    else:
        mid.append(nodule)
'''