def getScan(): metadatas = csvTools.readCSV('files/LIDC-IDRI_MetaData.csv') CTList = [] Patient_Id = [] Series_UID = [] sign = 0 for metadata in metadatas: if sign != 0: if metadata[4] == 'CT': ctdata = [] ctdata.append(metadata[1]) Patient_Id.append(metadata[1]) ctdata.append(metadata[9]) Series_UID.append(metadata[9]) CTList.append(ctdata) sign += 1 # csvTools.writeCSV('PatientId_SeriesUID.csv', CTList) patients = os.listdir(basedir) scan = [] for patient in patients: idlist1 = os.listdir(basedir + patient) for level1 in idlist1: idlist2 = os.listdir(basedir + patient + '/' + level1) for level2 in idlist2: if level2 in Series_UID: temp = patient + '/' + level1 + '/' + level2 scan.append(temp) return scan
def collect(annotations_filename,results_filename): annotations = csvTools.readCSV(annotations_filename) results = csvTools.readCSV(results_filename) seriesUIDs = [] header = results[0] # get the uilds from the predicts for result in results[1:]: seriesUIDs.append(result[header.index(seriesuid_label)]) seriesUIDs = sorted(set(seriesUIDs),key=seriesUIDs.index) for i in range(len(seriesUIDs)): print(seriesUIDs[i]) #get the nodules of the uids from annotations allNodules = collectNoduleAnnotations(annotations, seriesUIDs) return (allNodules, seriesUIDs)
def get_images_and_labels(data_root_dir): ''' Modified by WangQL 2020/5/27 ''' csv = os.listdir(data_root_dir) data = [] for one_csv in csv: data += csvTools.readCSV(os.path.join(data_root_dir, one_csv)) all_image_path = [] all_image_label = [] data_path = 'D:/Data/nodules/ori_hu/' npy_files = os.listdir(data_path) for one_data in data: id = one_data[0] if 'low' in one_data[1]: label = [0] else: label = [1] # print(id) npy = np.load(os.path.join(data_path, one_data[0] + '.npy')) npy = truncate_hu(npy) nor_npy = normalization(npy) # print(nor_npy) all_image_path.append(nor_npy) all_image_label.append(label) if 'high' in one_data[1]: all_image_path.append(nor_npy) all_image_label.append(label) # # get all images' paths (format: string) # data_root = pathlib.Path(data_root_dir) # all_image_path = [str(path) for path in list(data_root.glob('*/*'))] # # get labels' names # label_names = sorted(item.name for item in data_root.glob('*/')) # # dict: {label : index} # label_to_index = dict((label, index) for index, label in enumerate(label_names)) # # get all images' labels # all_image_label = [label_to_index[pathlib.Path(single_image_path).parent.name] for single_image_path in all_image_path] # return all_image_path, all_image_label # # get all images' paths (format: string) # data_root = pathlib.Path(data_root_dir) # all_image_path = [str(path) for path in list(data_root.glob('*/*'))] # # get labels' names # label_names = sorted(item.name for item in data_root.glob('*/')) # # dict: {label : index} # label_to_index = dict((label, index) for index, label in enumerate(label_names)) # # get all images' labels # all_image_label = [label_to_index[pathlib.Path(single_image_path).parent.name] for single_image_path in all_image_path] return all_image_path, all_image_label
def __init__(self): self.wordModel = Word2Vec.load('./txt/word2vec.model') self.transformedlistd = csvTools.readCSV('./txt/transformedlistd.csv') sentences = [] for onecontext in self.transformedlistd: words = onecontext[3].split(' ') sentences.append(words) model = Word2Vec(sentences, size=50, window=3, min_count=1, workers=2) self.model = model vector = model['上腹', '疼痛', '周']
def load_lidc(self): ''' LIDC data size: 128, 128, 1 ''' data_dir = os.path.join('./huimages/') mask_dir = os.path.join('./masks/') noduleinfo = csvTools.readCSV('files/malignancy.csv') self.images_list = os.listdir(data_dir) self.masks_list = os.listdir(mask_dir) self.noduleinfo = noduleinfo print('number of images_list: ', len(self.images_list)) print('number of mask_dir: ', len(self.masks_list)) print('number of noduleinfo: ', len(self.noduleinfo)) trainingdata = [] for onenodule in noduleinfo: scanid = onenodule[1] scanid = caseid_to_scanid(int(scanid)) noduleid = onenodule[3] scan_list_id = onenodule[2] nodule_image_name = str(scanid) + '_' + str(noduleid) + '_' + str(scan_list_id) + '.npy' if nodule_image_name in self.images_list: # attributes labels lobulation = onenodule[28] spiculation = onenodule[27] malignancy = onenodule[29] if float(lobulation) >= 0 or float(spiculation) >= 0: trainingdata.append(onenodule) # data augmentation if float(lobulation) >= 3 or float(spiculation) >= 3: import copy copy.deepcopy left = copy.deepcopy(onenodule) right = copy.deepcopy(onenodule) down = copy.deepcopy(onenodule) left.append('left') right.append('right') down.append('down') trainingdata.append(left) trainingdata.append(right) trainingdata.append(down) print('number of training data: ', len(trainingdata)) trainingdata = np.array(trainingdata) shuffle(trainingdata) self.data = trainingdata
def getAve(numlist): ave = 0 if len(numlist) != 4: print(numlist) for num in numlist: ave += int(num) if len(numlist) == 0: ave = 0 else: ave = ave / len(numlist) return ave nodule_chara = csvTools.readCSV(csvdir + 'nodule_chara_list.csv') list3 = csvTools.readCSV(csvdir + 'list3.2.csv') list3 = list3[1:len(list3)] count = 0 sign = 0 list3_2 = [] for item in list3: id = item[0] case = item[1] case = caseid_to_scanid(int(case)) ldlist = [] for ids in item[10:14]: item.append(ids)
import os import pandas as pd import pydicom import scipy.misc import cv2 import numpy as np import glob import xmlopt basedir = '/home/wangqiuli/Data/LIDC/DOI/' resdir = 'noduleimage/' imagedir = 'ori_images/' maskdir = 'ori_masks/' noduleinfo = csvTools.readCSV('files/malignancy.csv') idscaninfo = csvTools.readCSV('files/id_scan.txt') maskinfo = glob.glob(maskdir) def get_pixels_hu(ds): image = ds.pixel_array image = np.array(image, dtype=np.float32) intercept = ds.RescaleIntercept slope = ds.RescaleSlope image = image * slope image += intercept return image caselist = os.listdir(imagedir)
basedir = 'D:/Data/LIDC-IDRI/DOI/' csvdir= 'files/list3.2.csv' import Func_get_PatientId_SeriesUID listscan = Func_get_PatientId_SeriesUID.getScan() # print(listscan[0]) # print(len(listscan)) ''' LIDC-IDRI-0001/1.3.6.1.4.1.14519.5.2.1.6279.6001.298806137288633453246975630178/1.3.6.1.4.1.14519.5.2.1.6279.6001.179049373636438705059720603192 1018 ''' metadatas = csvTools.readCSV(csvdir) # print(len(metadatas)) ''' ['id', 'case', 'scan', 'roi', 'volume', 'eq. diam.', 'x loc.', 'y loc.', 'slice no.', '', 'nodIDs', '', '', '', '', '', ''] ''' metadatas = metadatas[1:len(metadatas)] # ==print(metadatas[0]) # print(metadatas[len(metadatas) - 1]) ''' ['1', '1', '3000566', '1', '6459.75', '23.107', '317', '367', '43', '', 'IL057_127364', 'Nodule 001', 'MI014_12127', '0', '', '', ''] ['2635', '1012', '32231', '1', '122.2', '6.157', '145', '153', '105', '', '0', '201255', '157143', 'Nodule 001', '', '', ''] ''' # restore nodule characteristics noudle_chara_list = []
from config import Config import tensorflow as tf import os import numpy as np from tqdm import tqdm from newmodel import RCNNMODEL import csvTools import random import tensorflow.contrib.slim as slim os.environ["CUDA_VISIBLE_DEVICES"] = "3" gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.4) data = Data() checkpoint_file = 'inception_v3_2016_08_28/inception_v3.ckpt' traintxt = csvTools.readCSV('./labels/cross2.csv') + csvTools.readCSV( './labels/cross3.csv') + csvTools.readCSV( './labels/cross4.csv') + csvTools.readCSV('./labels/cross5.csv') testtxt = csvTools.readCSV('./labels/cross1.csv') validtxt = csvTools.readCSV('./labels/cross1.csv') traindata = [] for one in traintxt: if len(one) != 0: traindata.append(one) testdata = [] for one in testtxt: if len(one) != 0:
# -*- coding:utf-8 -*- from pyltp import SentenceSplitter from pyltp import Segmentor import csvTools import re from string import digits from zhon.hanzi import punctuation from tqdm import tqdm import os segmentor = Segmentor() segmentor.load_with_lexicon('./ltp/cws.model','lexicon.txt') contexts2018 = csvTools.readCSV('2018all.csv') contexts2017 = csvTools.readCSV('2017.csv') contexts2016 = csvTools.readCSV('2016.csv') contexts = contexts2016+contexts2017+contexts2018 print(len(contexts)) # birthlist = [] descriptionlist = [] dignosislist = [] for context in contexts: # birth = context[3] description = context[6] dignosis = context[7] # birthlist.append(birth) descriptionlist.append(description) dignosislist.append(dignosis)
''' Created by Wang Qiu Li 6/27/2018 get relationship between id and CT scans(cause LIDC also contains X-ray) PatientId_SeriesUID.csv is for dataloader. You can get series related to the patient id. ''' import csvTools import os basedir = 'D:/Data/LIDC-IDRI/DOI/' metadatas = csvTools.readCSV('csv_xls/LIDC-IDRI_MetaData.csv') print(len(metadatas)) # print(metadatas[0]) ''' 10 column ['Collection', 'Patient Id', 'Study Date', 'Study Description', 'Modality', 'Series Description', 'Manufacturer', 'Manufacturer Model', 'Software Version', 'Series UID'] ''' ''' LIDC-IDRI-0132 LIDC-IDRI-0151 LIDC-IDRI-0315 LIDC-IDRI-0332 LIDC-IDRI-0355 LIDC-IDRI-0365 LIDC-IDRI-0442 LIDC-IDRI-0484
def evaluateCAD(label,seriesUIDs, results_filename, outputDir, allNodules, CADSystemName, maxNumberOfCADMarks=-1, performBootstrapping=False, numberOfBootstrapSamples=1000, confidence=0.95): ''' function to evaluate a CAD algorithm @param seriesUIDs: list of the seriesUIDs of the cases to be processed @param results_filename: file with predict results @param outputDir: output directory @param allNodules: dictionary with all nodule annotations of all cases, keys of the dictionary are the seriesuids @param CADSystemName: name of the CAD system, to be used in filenames and on FROC curve ''' nodOutputfile = open(os.path.join(outputDir, 'CADAnalysis_%s.txt' % label), 'w') nodOutputfile.write("\n") nodOutputfile.write((60 * "*") + "\n") nodOutputfile.write("CAD Analysis: %s\n" % CADSystemName) nodOutputfile.write((60 * "*") + "\n") nodOutputfile.write("\n") results = csvTools.readCSV(results_filename) allCandsCAD = {} for seriesuid in seriesUIDs: # collect candidates from result file nodules = {} header = results[0] i = 0 # get the class of label nodules for result in results[1:]: # predict result nodule_seriesuid = result[header.index(seriesuid_label)] # when corred to gt, get the pre info if seriesuid == nodule_seriesuid and float(result[header.index(cls_class)]) == float(label): nodule = getNodule(result, header) nodule.candidateID = i nodules[nodule.candidateID] = nodule i += 1 if (maxNumberOfCADMarks > 0): # number of CAD marks, only keep must suspicous marks if len(nodules.keys()) > maxNumberOfCADMarks: # make a list of all probabilities probs = [] for keytemp, noduletemp in nodules.items(): probs.append(float(noduletemp.CADprobability)) probs.sort(reverse=True) # sort from large to small probThreshold = probs[maxNumberOfCADMarks] nodules2 = {} nrNodules2 = 0 for keytemp, noduletemp in nodules.items(): if nrNodules2 >= maxNumberOfCADMarks: break if float(noduletemp.CADprobability) > probThreshold: nodules2[keytemp] = noduletemp nrNodules2 += 1 nodules = nodules2 print('adding candidates: ' + seriesuid) allCandsCAD[seriesuid] = nodules # open output files nodNoCandFile = open(os.path.join(outputDir, "nodulesWithoutCandidate_%s_%s.txt" % (CADSystemName,label)), 'w') # --- iterate over all cases (seriesUIDs) and determine how # often a nodule annotation is not covered by a candidate # initialize some variables to be used in the loop candTPs = 0 candFPs = 0 candFNs = 0 candTNs = 0 totalNumberOfCands = 0 totalNumberOfNodules = 0 doubleCandidatesIgnored = 0 irrelevantCandidates = 0 minProbValue = -1000000000.0 # minimum value of a float FROCGTList = [] FROCProbList = [] FPDivisorList = [] excludeList = [] FROCtoNoduleMap = [] # -- loop over the cases for seriesuid in seriesUIDs: # get the candidates from the predicts try: candidates = allCandsCAD[seriesuid] except KeyError: candidates = {} # add to the total number of candidates totalNumberOfCands += len(candidates.keys()) # make a copy in which items will be deleted candidates2 = candidates.copy() # get the nodule annotations on this case try: noduleAnnots = allNodules[seriesuid] print("label %s seriesuid %s has %d nodules"%(label,seriesuid,len(allNodules[seriesuid]))) except KeyError: noduleAnnots = [] # - loop over the nodule annotations for noduleAnnot in noduleAnnots: # increment the number of nodules # if noduleAnnot.state == "Included": totalNumberOfNodules += 1 x = float(noduleAnnot.coordX) y = float(noduleAnnot.coordY) z = float(noduleAnnot.coordZ) # 2. Check if the nodule annotation is covered by a candidate # A nodule is marked as detected when the center of mass of the candidate is within a distance R of # the center of the nodule. In order to ensure that the CAD mark is displayed within the nodule on the # CT scan, we set R to be the radius of the nodule size. diameterX = float(noduleAnnot.diameterX)/2.0 diameterY = float(noduleAnnot.diameterY)/2.0 diameterZ = float(noduleAnnot.diameterZ)/2.0 if diameterX < 2.0: diameterX = 2.0 if diameterY < 2.0: diameterY = 2.0 if diameterZ < 2.0: diameterZ = 2.0 radiusSquaredX = diameterX / 2.0 radiusSquaredY = diameterY / 2.0 radiusSquaredZ = diameterZ / 2.0 found = False noduleMatches = [] # compute if the pre in the area of the gt for key, candidate in candidates.items(): x2 = float(candidate.coordX) y2 = float(candidate.coordY) z2 = float(candidate.coordZ) distX = math.fabs(x-x2) distY = math.fabs(y-y2) distZ = math.fabs(z-z2) if distX <= radiusSquaredX and distY <= radiusSquaredY and distZ <= radiusSquaredZ: found = True noduleMatches.append(candidate) if key in candidates2.keys(): del candidates2[key] # if for one gt, there is one more pres, only leave one pre if len(noduleMatches) > 1: # double detection doubleCandidatesIgnored += (len(noduleMatches) - 1) if found == True: # append the sample with the highest probability for the FROC analysis maxProb = None for idx in range(len(noduleMatches)): candidate = noduleMatches[idx] if (maxProb is None) or (float(candidate.CADprobability) > maxProb): maxProb = float(candidate.CADprobability) print("seriesuid %s get prediction prob %.9f"%(seriesuid,float(candidate.CADprobability))) FROCGTList.append(1.0) FROCProbList.append(float(maxProb)) FPDivisorList.append(seriesuid) excludeList.append(False) FROCtoNoduleMap.append("%s,%s,%s,%s,%s,%.9f,%.9f,%.9f,%s,%.9f" % ( seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameterX),float(noduleAnnot.diameterY),float(noduleAnnot.diameterZ), str(candidate.id), float(candidate.CADprobability))) candTPs += 1 else: candFNs += 1 # append a positive sample with the lowest probability, such that this is added in the FROC analysis FROCGTList.append(1.0) FROCProbList.append(minProbValue) FPDivisorList.append(seriesuid) excludeList.append(True) FROCtoNoduleMap.append("%s,%s,%s,%s,%s,%.9f,%.9f,%.9f,%s,%s" % ( seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameterX),float(noduleAnnot.diameterY),float(noduleAnnot.diameterZ), int(-1), "NA")) nodNoCandFile.write("%s,%s,%s,%s,%s,%.9f,%.9f,%.9f,%s\n" % ( seriesuid, noduleAnnot.id, noduleAnnot.coordX, noduleAnnot.coordY, noduleAnnot.coordZ, float(noduleAnnot.diameterX),float(noduleAnnot.diameterY),float(noduleAnnot.diameterZ), str(-1))) # add all false positives to the vectors for key, candidate3 in candidates2.items(): candFPs += 1 FROCGTList.append(0.0) FROCProbList.append(float(candidate3.CADprobability)) FPDivisorList.append(seriesuid) excludeList.append(False) FROCtoNoduleMap.append("%s,%s,%s,%s,%s,%s,%.9f" % ( seriesuid, -1, candidate3.coordX, candidate3.coordY, candidate3.coordZ, str(candidate3.id), float(candidate3.CADprobability))) if not (len(FROCGTList) == len(FROCProbList) and len(FROCGTList) == len(FPDivisorList) and len(FROCGTList) == len( FROCtoNoduleMap) and len(FROCGTList) == len(excludeList)): nodOutputfile.write("Length of FROC vectors not the same, this should never happen! Aborting..\n") nodOutputfile.write("Candidate detection results:\n") nodOutputfile.write(" True positives: %d\n" % candTPs) nodOutputfile.write(" False positives: %d\n" % candFPs) nodOutputfile.write(" False negatives: %d\n" % candFNs) nodOutputfile.write(" True negatives: %d\n" % candTNs) nodOutputfile.write(" Total number of candidates: %d\n" % totalNumberOfCands) nodOutputfile.write(" Total number of nodules: %d\n" % totalNumberOfNodules) nodOutputfile.write(" Ignored candidates on excluded nodules: %d\n" % irrelevantCandidates) nodOutputfile.write( " Ignored candidates which were double detections on a nodule: %d\n" % doubleCandidatesIgnored) if int(totalNumberOfNodules) == 0: nodOutputfile.write(" Sensitivity: 0.0\n") else: nodOutputfile.write(" Sensitivity: %.9f\n" % (float(candTPs) / float(totalNumberOfNodules))) nodOutputfile.write( " Average number of candidates per scan: %.9f\n" % (float(totalNumberOfCands) / float(len(seriesUIDs)))) print(FROCGTList) print(FROCProbList) # compute FROC fps, sens, thresholds = computeFROC(FROCGTList, FROCProbList, len(seriesUIDs), excludeList) print(fps) print(sens) print(thresholds) if performBootstrapping: fps_bs_itp, sens_bs_mean, sens_bs_lb, sens_bs_up = computeFROC_bootstrap(FROCGTList, FROCProbList, FPDivisorList, seriesUIDs, excludeList, numberOfBootstrapSamples=numberOfBootstrapSamples, confidence=confidence) # Write FROC curve with open(os.path.join(outputDir, "froc_%s_%s.txt" % (CADSystemName,label)), 'w') as f: for i in range(len(sens)): f.write("%.9f,%.9f,%.9f\n" % (fps[i], sens[i], thresholds[i])) # Write FROC vectors to disk as well with open(os.path.join(outputDir, "froc_gt_prob_vectors_%s_%s.csv" % (CADSystemName,label)), 'w') as f: for i in range(len(FROCGTList)): f.write("%d,%.9f\n" % (FROCGTList[i], FROCProbList[i])) fps_itp = np.linspace(FROC_minX, FROC_maxX, num=10001) sens_itp = np.interp(fps_itp, fps, sens) score = 0 for i in range(len(fps_itp)): # print("asdaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"+str(fps_itp[i])) if Decimal(fps_itp[i]).quantize(Decimal("0.000")) in [0.125, 0.250, 0.500, 1.000, 2.000, 4.000, 8.000]: # print("asdddddddddddddddddddddddddddddddddddddddddddddd"+str(fps_itp[i])) score += sens_itp[i] # print("fps_itp %.2f ,sens_itp %.2f"%(fps_itp[i],sens_itp[i])) nodOutputfile.write("fps_itp %.2f ,sens_itp %.2f\n"%(fps_itp[i],sens_itp[i])) score = score/7.0 print(" Average sensivity over seven fps for label %s: %.9f\n" % (label,score)) nodOutputfile.write( " Average sensivity over seven fps for label %s: %.9f\n" % (label,score)) if performBootstrapping: # Write mean, lower, and upper bound curves to disk with open(os.path.join(outputDir, "froc_%s_bootstrapping_%s.csv" % (CADSystemName,label)), 'w') as f: f.write("FPrate,Sensivity[Mean],Sensivity[Lower bound],Sensivity[Upper bound]\n") for i in range(len(fps_bs_itp)): f.write("%.9f,%.9f,%.9f,%.9f\n" % (fps_bs_itp[i], sens_bs_mean[i], sens_bs_lb[i], sens_bs_up[i])) else: fps_bs_itp = None sens_bs_mean = None sens_bs_lb = None sens_bs_up = None # create FROC graphs if int(totalNumberOfNodules) > 0: graphTitle = str("") fig1 = plt.figure() ax = plt.gca() clr = 'b' plt.plot(fps_itp, sens_itp, color=clr, label="%s" % CADSystemName, lw=2) if performBootstrapping: plt.plot(fps_bs_itp, sens_bs_mean, color=clr, ls='--') plt.plot(fps_bs_itp, sens_bs_lb, color=clr, ls=':') # , label = "lb") plt.plot(fps_bs_itp, sens_bs_up, color=clr, ls=':') # , label = "ub") ax.fill_between(fps_bs_itp, sens_bs_lb, sens_bs_up, facecolor=clr, alpha=0.05) xmin = FROC_minX xmax = FROC_maxX plt.xlim(xmin, xmax) plt.ylim(0, 1) plt.xlabel('Average number of false positives per scan') plt.ylabel('Sensitivity') plt.legend(loc='lower right') plt.title('FROC performance - %s' % (CADSystemName)) if bLogPlot: plt.xscale('log', basex=2) ax.xaxis.set_major_formatter(FixedFormatter([0.125, 0.25, 0.5, 1, 2, 4, 8])) # set your ticks manually ax.xaxis.set_ticks([0.125, 0.25, 0.5, 1, 2, 4, 8]) ax.yaxis.set_ticks(np.arange(0, 1.1, 0.1)) plt.grid(b=True, which='both') plt.tight_layout() plt.savefig(os.path.join(outputDir, "froc_%s_%s.png" % (CADSystemName,label)), bbox_inches=0, dpi=300) return (score,fps, sens, thresholds, fps_bs_itp, sens_bs_mean, sens_bs_lb, sens_bs_up)
# -*- coding:utf-8 -*- from gensim.test.utils import common_texts, get_tmpfile from gensim.models import Word2Vec import csvTools path = get_tmpfile("word2vec.model") #创建临时文件 contexts = csvTools.readCSV('./transformedlistd.csv') sentences = [] for onecontext in contexts: words = onecontext[3].split(' ') sentences.append(words) #加载模型 # model = Word2Vec.load("word2vec.model") model = Word2Vec(sentences, size=50, window=3, min_count=1, workers=2) model.train(words, total_examples=1, epochs=1) # model.save("word2vec.model") vector = model['咳嗽', '发热', '畏寒', '活动', '后', '气促', '伴', '心悸', '余天'] print(vector.shape)
if n > ls_len: return [] elif n == ls_len: return [[i] for i in ls] else: j = ls_len // n k = ls_len % n ls_return = [] for i in range(0, (n - 1) * j, j): ls_return.append(ls[i:i + j]) ls_return.append(ls[(n - 1) * j:]) return ls_return data = Data() alldata = csvTools.readCSV( '/raid/data/wangqiuli/Documents/pneumonia/label/all.csv') dividedall = div_list(alldata, 5) # print(len(dividedall[0])) # for onep in dividedall[0]: # print(onep) # traindata = dividedall[0] + dividedall[1] +dividedall[2] +dividedall[3] # testdata = dividedall[4] # print(len(traindata)) # print(len(testdata)) import multiprocessing import time import thread npypath = '/home/wangqiuli/raid/pneumonia/numpyfiles/'
from config import Config import tensorflow as tf import os import numpy as np from tqdm import tqdm from newmodel import RCNNMODEL import csvTools import random import tensorflow.contrib.slim as slim os.environ["CUDA_VISIBLE_DEVICES"] = "2" gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.45) data = Data() checkpoint_file = 'inception_v3_2016_08_28/inception_v3.ckpt' traintxt = csvTools.readCSV('./labels/train.csv') testtxt = csvTools.readCSV('./labels/test.csv') validtxt = csvTools.readCSV('./labels/valid.csv') traindata = [] for one in traintxt: if len(one) != 0: traindata.append(one) testdata=[] for one in testtxt: if len(one) != 0: testdata.append(one)
''' Created by WangQL 2020/5/27 ''' import csvTools import os labelCSV = csvTools.readCSV('/home/wangqiuli/raid/senet_data/files/malignancy.csv') print('number of samples: ', len(labelCSV)) data_path = '/home/wangqiuli/raid/senet_data/ori_hu/' npy_data = os.listdir(os.path.join(data_path)) print('number of npy: ', len(npy_data)) data_dic_low = {} data_dic_high = {} data_dic_mid = {} for one_npy in npy_data: temp_name = one_npy patient_id = temp_name[:temp_name.find('_')] patient_id = int(patient_id[len(patient_id) - 4:]) # print(patient_id) temp_name = temp_name[temp_name.find('_') + 1:] nodule_id = temp_name[:temp_name.find('_')] # print(nodule_id) temp_name = temp_name[temp_name.find('_') + 1:]
''' Created by WangQL 4.10.2020 counting nodules according to its different attributes ''' import csvTools labels = csvTools.readCSV('files/malignancy.csv') index = 26 countlob1 = [] countlob2 = [] countlob3 = [] countlob4 = [] countlob5 = [] for one in labels: sign = round(float(one[27])) if sign == 1: countlob1.append(one) elif sign == 2: countlob2.append(one) elif sign == 3: countlob3.append(one) elif sign == 4: countlob4.append(one) elif sign == 5: countlob5.append(one)
''' Created by Wang Qiu Li 7/3/2018 divide the nodules into two groups level 1 and 2 are classified into LMNs level 4 and 5 are classified into HMHs level 3 are excluded ''' import csvTools nodules = csvTools.readCSV('files/malignancy.csv') # print(nodules[2][29]) # index 29 is malignancy level lmns = [] hmns = [] mid = [] for nodule in nodules: level = float(nodule[29]) if level >= 3.5: hmns.append(nodule) elif level < 2.5: lmns.append(nodule) else: mid.append(nodule) '''