def data_preprocess(self, input_image, normalization=None, num_channels=0): if type(input_image) != np.ndarray: input_image = np.stack(input_image, axis=0) if normalization == 'CT': image = mt.medical_normalization(input_image, input_copy=False) elif normalization == 'CT_chest': image = mt.medical_normalization(input_image, max_bound=1000, min_bound=-1000, pixel_mean=0.25, crop=False, input_copy=False) elif normalization == 'CT_lung': image = mt.medical_normalization(input_image, max_bound=400, min_bound=-1000, pixel_mean=0.25, crop=True, input_copy=False) elif normalization == 'image': image_max = (np.zeros(1, dtype=input_image.dtype) - 1)[0] #image_max = 65535 image = input_image / float(image_max) else: image = input_image if num_channels > 0: image = np.expand_dims(image, axis=0) if num_channels > 1: image = image.repeat(num_channels, axis=0) image = torch.from_numpy(image).float() return image
def fileopen(): filename = filedialog.askopenfilename( filetypes=[('mhd files', '*.mhd'), ('dicom files', '*.dcm'), ('numpy files', '*.npy'), ('all', '*')]) #filename = "E:/tianchi_project/TIANCHI_examples/train_1/LKDS-00001.mhd" if filename == '': return print(repr(filename)) prename, extname = os.path.splitext(filename) if extname == '.mhd': full_image_info = sitk.ReadImage(filename) full_scan = sitk.GetArrayFromImage(full_image_info) old_spacing = np.array(full_image_info.GetSpacing())[::-1] volimg, new_spacing = mt.resample(full_scan, old_spacing) elif extname == '.dcm': pathname = os.path.split(filename)[0] full_scan, full_image_info, patientid = mt.read_dicom_scan( pathname) cvm.view_CT(full_scan) old_spacing = np.array(full_image_info.GetSpacing())[::-1] volimg, new_spacing = mt.resample(full_scan, old_spacing) elif extname == '.npy': volimg = np.load(filename) else: print('unknown data type') return label = tk.Label(tool_view, image=cvm.view_CT(volimg)) label.pack() tool_view.quit()
def save_nonnodule(self, nodule_crop, store_name, mhd_store=False): np.save( os.path.join(self.nonnodule_npy_path, store_name + "_nonannotation.npy"), nodule_crop) if mhd_store: mt.write_mhd_file( self.no_annotation_mhd_path + store_name + "_nonannotation.mhd", nodule_crop, nodule_crop.shape)
def lung_slic_tool(filename): prename, extname = os.path.splitext(filename) pathname = os.path.split(filename)[0] if (extname == '.npy'): segresult = np.load(filename) labels = np.load(prename + '.label.npy') elif (extname == '.mhd'): npyname = filename + '.npy' filelist = [pathname + '/' + i for i in os.listdir(pathname)] if npyname in filelist: segresult = np.load(npyname) labels = np.load(filename + '.label.npy') else: full_image_info = sitk.ReadImage(filename) full_scan = sitk.GetArrayFromImage(full_image_info) old_spacing = np.array(full_image_info.GetSpacing())[::-1] image, new_spacing = mt.resample(full_scan, old_spacing) ##### #image = image[195:200] ##### print("slic segmenting") labels = lc.slic_segment(image, view_result=True) print("segmentation complete")
def fileopen(): filename = tk.filedialog.askopenfilename(filetypes=[('mhd', '*.mhd')]) #filename = "E:/tianchi_project/TIANCHI_examples/train_1/LKDS-00001.mhd" print(repr(filename)) full_image_info = sitk.ReadImage(filename) full_scan = sitk.GetArrayFromImage(full_image_info) old_spacing = np.array(full_image_info.GetSpacing())[::-1] img2, new_spacing = mt.resample(full_scan, old_spacing) label = tk.Label(tool_view, image=cv.view_CT(img2)) label.pack() canvas.pack()
def __init__(self, all_patients_path="./SLH_data/", annotations_file="./csv_files/annotations.csv", output_path="./nodule_cubes/", vision_path="./detection_vision"): """param: workspace: 本次比赛all_patients的父目录""" self.all_patients_path = all_patients_path self.annotations_file = annotations_file self.output_path = output_path self.vision_path = vision_path self.nodules_npy_path = output_path + "npy/" self.nonnodule_npy_path = output_path + "npy_non/" self.df_annotations = pd.read_excel(self.annotations_file) self.annotation_columns = [ "anno1", "anno2", "anno3", "anno4", "anno5", "anno6", "anno7", "anno8", "anno9" ] self.ls_all_patients = [] time_packages = mt.get_dirs(self.all_patients_path) for package in time_packages: self.ls_all_patients.extend(mt.get_dirs(package))
def prediction_combine(prediction_volume, maxclsize=-1, minclsize=10): print('prediction combination') volume4combine = prediction_volume.copy() nodule_detections = [] while 1: # cluster_vision = np.zeros(volume4combine.shape) maxindex = volume4combine.argmax() maxz = int(maxindex / (volume4combine.shape[1] * volume4combine.shape[2])) maxy = int((maxindex % (volume4combine.shape[1] * volume4combine.shape[2])) / volume4combine.shape[2]) maxx = int(maxindex % volume4combine.shape[2]) if volume4combine[maxz, maxy, maxx] <= 0: break nodule_center = np.array([maxz, maxy, maxx], dtype=int) # nodule_detections.append([maxz, maxy, maxx, volume4combine[maxz][maxy][maxx]]) volume4combine[maxz, maxy, maxx] = 0 # steps = [[0, 0, 1], [0, 0, -1], [0, 1, 0], [0, 1, 1], [0, 1, -1], [0, -1, 0], [0, -1, 1], [0, -1, -1], # [1, 0, 0], [1, 0, 1], [1, 0, -1], [1, 1, 0], [1, 1, 1], [1, 1, -1], [1, -1, 0], [1, -1, 1], # [1, -1, -1], # [-1, 0, 0], [-1, 0, 1], [-1, 0, -1], [-1, 1, 0], [-1, 1, 1], [-1, 1, -1], [-1, -1, 0], # [-1, -1, 1], [-1, -1, -1]] steps = [[0, 0, 1], [0, 0, -1], [0, 1, 0], [0, 1, 1], [0, 1, -1], [0, -1, 0], [0, -1, 1], [0, -1, -1], [1, 0, 0], [1, 0, 1], [1, 0, -1], [1, 1, 0], [1, -1, 0], [-1, 0, 0], [-1, 0, 1], [-1, 0, -1], [-1, 1, 0], [-1, -1, 0]] cluster_stack = [[maxz, maxy, maxx]] size = 0 # print("cluster {} rest voxel num:{}" .format(len(nodule_detections)+1, np.count_nonzero(volume4combine))) while len(cluster_stack) > 0: z, y, x = cluster_stack.pop(0) # cluster_vision[z][y][x] = 1 size += 1 for step in steps: neighbor = np.array([z + step[0], y + step[1], x + step[2]], dtype=int) if not mt.coord_overflow(neighbor, volume4combine.shape) and volume4combine[ neighbor[0], neighbor[1], neighbor[2]] > 0: nodule_center += neighbor volume4combine[neighbor[0], neighbor[1], neighbor[2]] = 0 cluster_stack.append([neighbor[0], neighbor[1], neighbor[2]]) # print("cluster_size:{}" .format(size)) if (maxclsize < 0 or size <= maxclsize) and size >= minclsize: # if "cluster_visions" not in dir(): # cluster_visions = cluster_vision.reshape((1, cluster_vision.shape[0], cluster_vision.shape[1], cluster_vision.shape[2])) # else: # cluster_visions = np.concatenate((cluster_visions, cluster_vision.reshape((1, cluster_vision.shape[0], cluster_vision.shape[1], cluster_vision.shape[2]))), axis=0) nodule_center = np.int_(nodule_center / float(size) + np.array([0.5, 0.5, 0.5])) nodule_detections.append([nodule_center[0], nodule_center[1], nodule_center[2], prediction_volume[maxz, maxy, maxx]]) # the format of output is [z,y,x,prediction] return nodule_detections
def __getitem__(self, idx): datamap = self.datamap_list[idx] if datamap[0]: label, data_idx, aug_idx = datamap data_path = self.positive_list[data_idx] volume_overbound = np.load(data_path) volume_fitted = da.extract_augment_random( aug_idx, volume_overbound, self.data_size, self.translation_num, self.translation_range, self.rotation_num, self.flip_num) else: label, data_idx = datamap data_path = self.negative_list[data_idx] volume_fitted = np.load(data_path) if np.linalg.norm(np.int_(volume_fitted.shape) - self.data_size) != 0: volume_fitted = mt.local_crop( volume_fitted, np.rint(np.array(volume_fitted.shape) / 2).astype(int), self.data_size ) #the sample should be cropped to fit the data size #volume = volume_fitted + np.random.random_sample(volume_fitted.shape) * random.choice(self.noise_range) volume = da.add_noise(volume_fitted, self.noise_range) if self.mode not in self.norm_mode_dict.keys(): norm_mode = "CT_lung" else: norm_mode = self.norm_mode_dict[self.mode] volume = self.data_preprocess(volume, norm_mode, num_channels=1) ''' volume_normalized = mt.medical_normalization(volume_fitted, input_copy=False) volume_noisy = volume_normalized + np.random.random_sample(volume_normalized.shape) * random.choice(self.noise_range) volume_reshaped = volume_noisy.reshape(1, volume_noisy.shape[0], volume_noisy.shape[1], volume_noisy.shape[2]) volume = torch.from_numpy(volume_reshaped).float() ''' output = {'data': volume, 'label': label} if self.file_retrieval: output['path'] = data_path #return {'data': volume, 'label': label, 'path': data_path} #else: return {'data': volume, 'label': label} return output
def get_ct_constants(self): maxvalue = -2000 minvalue = 2000 for patient in enumerate(tqdm(self.ls_all_patients)): patient = patient[1] #print(patient) patient_uid = mt.get_serie_uid(patient) patient_nodules = self.df_annotations[self.df_annotations.file == patient] full_image_info = sitk.ReadImage(patient) full_scan = sitk.GetArrayFromImage(full_image_info) full_scan[full_scan < -1024] = -1024 segimage, segmask, flag = cd.segment_lung_mask(full_scan) vmax = full_scan[segmask == 1].max() vmin = full_scan[segmask == 1].min() if maxvalue < vmax: maxvalue = vmax maxfile = patient if minvalue > vmin: minvalue = vmin print("maxvalue:%d minvalue:%d" % (maxvalue, minvalue)) print("%s" % (maxfile)) return maxvalue, minvalue, maxfile
negative_train_accuracy = 0.0 import_batch_size = 900 for pbi in range(0, train_num, import_batch_size): print("training process:%d/%d" % (pbi, train_num)) for pti in range(pbi, min(pbi + import_batch_size, train_num)): data_index = train_indices[pti] if data_index < num_positive: pfile = pfiles[data_index] isnodule = True else: pfile = nfiles[data_index - num_positive] isnodule = False data_volume = np.load(pfile) if isnodule: if pfile.split('/')[0].find("luna") >= 0: patient_uid, nodule_diameter = mt.get_luna_annotation_informations( pfile, annotation) elif pfile.split('/')[0].find("tianchi") >= 0: patient_uid, nodule_diameter = mt.get_luna_annotation_informations( pfile, annotation2) else: patient_uid = mt.get_volume_informations(pfile)[0] nodule_diameter = 0 data_volume = da.extract_volumes( data_volume, np.int_([REGION_SIZE, REGION_SIZE, REGION_SIZE]), nodule_diameter=nodule_diameter, scale_augment=AUGMENTATION, translation_augment=AUGMENTATION, rotation_augment=AUGMENTATION) else: data_volume = mt.local_crop(
start_time = time.time() #patient_evaluations = open(evaluation_path + "/patient_evaluations.log", "w") results = [] CPMs = [] CPMs2 = [] hard_negatives = [] test_patients = all_patients #random.shuffle(test_patients) bt.filelist_store(test_patients, evaluation_path + "/patientfilelist.log") for p in range(len(test_patients)): patient = test_patients[p] #patient = "./LUNA16/subset9/1.3.6.1.4.1.14519.5.2.1.6279.6001.212608679077007918190529579976.mhd" #patient = "./LUNA16/subset9/1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408431413578140925249.mhd" #patient = "./TIANCHI_examples/LKDS-00005.mhd" uid = mt.get_mhd_uid(patient) if 'test_uids' in dir() and uid not in test_uids: print("%d/%d patient %s not belong to test set" % (p + 1, len(test_patients), uid)) continue print('%d/%d processing patient:%s' % (p + 1, len(test_patients), uid)) full_image_info = sitk.ReadImage(patient) full_scan = sitk.GetArrayFromImage(full_image_info) origin = np.array(full_image_info.GetOrigin( ))[::-1] #the order of origin and old_spacing is initially [z,y,x] old_spacing = np.array(full_image_info.GetSpacing())[::-1] image, new_spacing = mt.resample(full_scan, old_spacing, np.array([1, 1, 1])) #image = np.load(patient) #new_spacing = np.array([1, 1, 1])
#ktb.set_session(mt.get_session(0.5)) start_time = time.time() #patient_evaluations = open(evaluation_path + "/patient_evaluations.log", "w") results = [] test_patients = all_patients #test_patients = ["./LUNA16/subset9/1.3.6.1.4.1.14519.5.2.1.6279.6001.195557219224169985110295082004.mhd"] for p in range(len(test_patients)): result = [] patient = test_patients[p] #patient = "./LUNA16/subset9/1.3.6.1.4.1.14519.5.2.1.6279.6001.300270516469599170290456821227.mhd" #patient = "./LUNA16/subset9/1.3.6.1.4.1.14519.5.2.1.6279.6001.212608679077007918190529579976.mhd" #patient = "./LUNA16/subset9/1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408431413578140925249.mhd" #patient = "./TIANCHI_examples/LKDS-00005.mhd" #uid = mt.get_sample_uid(patient) #annotations = mt.get_luna_annotations(uid, annotation_file) full_scan, full_image_info, uid = mt.read_sph_scan(patient) origin = np.array(full_image_info.GetOrigin())[::-1] old_spacing = np.array(full_image_info.GetSpacing())[::-1] annotations = mt.get_sph_annotations(uid, annotation_file) if len(annotations) == 0: print('%d/%d patient %s has no annotations, ignore it.' % (p + 1, len(test_patients), uid)) #patient_evaluations.write('%d/%d patient %s has no annotations, ignore it\n' %(p+1, len(test_patients), uid)) continue print('%d/%d processing patient:%s' % (p + 1, len(test_patients), uid)) #full_image_info = sitk.ReadImage(patient) #full_scan = sitk.GetArrayFromImage(full_image_info) #origin = np.array(full_image_info.GetOrigin())[::-1] #the order of origin and old_spacing is initially [z,y,x] #old_spacing = np.array(full_image_info.GetSpacing())[::-1] image, new_spacing = mt.resample(full_scan, old_spacing) #resample
start_time = time.time() #patient_evaluations = open(evaluation_path + "/patient_evaluations.log", "w") results = [] CPMs = [] CPMs2 = [] test_patients = all_patients #test_count = 0 #random.shuffle(test_patients) bt.filelist_store(test_patients, evaluation_path + "/patientfilelist.log") for p in range(len(test_patients)): patient = test_patients[p] #patient = "./LUNA16/subset9/1.3.6.1.4.1.14519.5.2.1.6279.6001.212608679077007918190529579976.mhd" #patient = "./LUNA16/subset9/1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408431413578140925249.mhd" #patient = "./TIANCHI_examples/LKDS-00005.mhd" uid = mt.get_mhd_uid(patient) if 'test_uids' not in dir() or uid not in test_uids: print("%d/%d patient %s not belong to test set" %(p+1, len(test_patients), uid)) continue annotations = mt.get_luna_annotations(uid, annotation_file) exclusions = mt.get_luna_annotations(uid, exclude_file) if len(annotations) == 0: print("%d/%d patient %s has no annotations, ignore it." %(p+1, len(test_patients), uid)) #patient_evaluations.write('%d/%d patient %s has no annotations, ignore it\n' %(p+1, len(test_patients), uid)) continue #test_count += 1 #if test_count < START_NUM: #the START_NUM begin from 1 #print("%d/%d patient %s count %d/%d." %(p+1, len(test_patients), uid, test_count, START_NUM)) #continue
def annotations_crop(self, overbound=True, candsample=False): if os.access(self.output_path, os.F_OK): shutil.rmtree(self.output_path) os.makedirs(self.output_path) os.mkdir(self.nodules_npy_path) os.mkdir(self.nonnodule_npy_path) if not os.access(self.vision_path, os.F_OK): os.makedirs(self.vision_path) for patient in enumerate(tqdm(self.ls_all_patients[814:])): patient = patient[1] #patient = "./SLH_data/0721/285 0800418645" print(patient) full_scan, full_image_info, patient_uid = mt.read_dicom_scan( patient) if full_scan.min() < -1024: errorlog = open("results/error.log", "w") errorlog.write("Hu unit incorrect:%s\n" % (patient)) errorlog.close() origin = np.array( full_image_info.GetOrigin())[::-1] #---获取“体素空间”中结节中心的坐标 old_spacing = np.array(full_image_info.GetSpacing() )[::-1] #---该CT在“世界空间”中各个方向上相邻单位的体素的间距 min_space = old_spacing.min() image, new_spacing = mt.resample(full_scan, old_spacing) #---重采样 print('resample done') silist = self.df_annotations.serie_id.tolist() if silist.count(patient_uid) == 0: print('no annotation for this patient found') continue serie_index = silist.index(patient_uid) patient_nodules = [] for annocol in self.annotation_columns: annostr = self.df_annotations.get(annocol)[serie_index] if type(annostr) == unicode: #annotation = np.array(annostr.split(u'\uff08')[0].split(' '), dtype=int) #patient_nodules.append([serie_index, annotation]) #the index order is [x,y,z] if annostr.find(u'*') >= 0: continue coordbegin = -1 coordend = -1 for ci in range(len(annostr)): if coordbegin < 0: if annostr[ci] >= u'0' and annostr[ci] <= u'9': coordbegin = ci elif (annostr[ci] < u'0' or annostr[ci] > u'9') and annostr[ci] != u' ': coordend = ci break if coordbegin >= 0: if coordend < 0: coordend = len(annostr) coordstr = annostr[coordbegin:coordend] annotation = np.array(coordstr.split(u' '), dtype=int) patient_nodules.append( [annocol, annotation]) # the index order is [x,y,z] if type(annostr) == str: if annostr.find('*') >= 0: continue coordbegin = -1 coordend = -1 for ci in range(len(annostr)): if coordbegin < 0: if annostr[ci] >= '0' and annostr[ci] <= '9': coordbegin = ci elif (annostr[ci] < '0' or annostr[ci] > '9') and annostr[ci] != ' ': coordend = ci break if coordbegin >= 0: # annotation = np.array(annostr.split('(')[0].split(' '), dtype=int) if coordend < 0: coordend = len(annostr) coordstr = annostr[coordbegin:coordend] annotation = np.array(coordstr.split(' '), dtype=int) patient_nodules.append( [annocol, annotation]) # the index order is [x,y,z] v_centers = [] center_coords = [] for annocol, nodule in patient_nodules: nodule_center = np.array(np.flip(nodule, axis=0) * old_spacing / new_spacing, dtype=int) #---获取“世界空间”中结节中心的坐标 #v_center = np.rint((nodule_center - origin) / new_spacing) #映射到“体素空间”中的坐标 #v_center = np.array(v_center, dtype=int) v_centers.append([annocol, nodule_center]) center_coords.append(nodule_center) #volume_regioned = cv.view_coordinations(image, center_coords, window_size=10, reverse=False, slicewise=False, show=False) #cv.view_CT(volume_regioned) #np.save(self.vision_path+"/"+patient_uid+"_annotated.mhd", volume_regioned) #---这一系列的if语句是根据“判断一个结节的癌性与否需要结合该结节周边位置的阴影和位置信息”而来,故每个结节都获取了比该结节尺寸略大的3D体素 #get annotations nodule window_half = int(BOX_SIZE / 2) if overbound: box_size = 2 * BOX_SIZE box_half = BOX_SIZE else: box_size = BOX_SIZE box_half = window_half for annocol, v_center in v_centers: zyx_1 = v_center - box_half # 注意是: Z, Y, X zyx_2 = v_center + box_half if mt.coord_overflow(zyx_1, image.shape) or mt.coord_overflow( zyx_2, image.shape): zyx_1_fix = zyx_1.copy() zyx_2_fix = zyx_2.copy() for ci in range(3): if zyx_1[ci] < 0: zyx_1_fix[ci] = 0 elif zyx_1[ci] >= image.shape[ci]: zyx_1_fix[ci] = image.shape[ci] if zyx_2[ci] < 0: zyx_2_fix[ci] = 0 elif zyx_2[ci] >= image.shape[ci]: zyx_2_fix[ci] = image.shape[ci] img_crop = image[zyx_1_fix[0]:zyx_2_fix[0], zyx_1_fix[1]:zyx_2_fix[1], zyx_1_fix[2]:zyx_2_fix[2]] img_crop[img_crop < -1024] = -1024 #if img_crop.max() >= 600: # padding_value = 600 #elif img_crop.max() >= 0: # padding_value = img_crop.max() #else: # padding_value = -1024 padding_value = -1024 nodule_box = padding_value * np.ones( [box_size, box_size, box_size], int) nodule_box[zyx_1_fix[0] - zyx_1[0]:zyx_2_fix[0] - zyx_1[0], zyx_1_fix[1] - zyx_1[1]:zyx_2_fix[1] - zyx_1[1], zyx_1_fix[2] - zyx_1[2]:zyx_2_fix[2] - zyx_1[2]] = img_crop else: #nodule_box = np.zeros([box_size, box_size, box_size], np.int16) img_crop = image[zyx_1[0]:zyx_2[0], zyx_1[1]:zyx_2[1], zyx_1[2]:zyx_2[2]] # ---截取立方体 img_crop[img_crop < -1024] = -1024 # ---设置窗宽,小于-1024的体素值设置为-1024 nodule_box = img_crop[0:box_size, 0:box_size, 0:box_size] # ---将截取的立方体置于nodule_box self.save_annotations_nodule( nodule_box, patient_uid + "_" + annocol + "_ob") print("annotation sampling done") #get candidate annotation nodule candidate_coords = [] if candsample: segimage, segmask, flag = cd.segment_lung_mask(image) if segimage is not None: nodule_matrix, index = cd.candidate_detection( segimage, flag) cluster_labels = lc.seed_mask_cluster(nodule_matrix, cluster_size=1000) #cluster_labels = lc.seed_volume_cluster(image, segmask, eliminate_lower_size=-1) #segresult = lc.segment_color_vision(image, cluster_labels) #cv.view_CT(segresult) #lc.cluster_size_vision(cluster_labels) candidate_coords, _ = lc.cluster_centers(cluster_labels) #candidate_coords = lc.cluster_center_filter(image, candidate_coords) #the coordination order is [z,y,x] print("candidate number:%d" % (len(candidate_coords))) #volume_regioned = cv.view_coordinations(image, candidate_coords, window_size=10, reverse=False, slicewise=True, show=False) #mt.write_mhd_file(self.vision_path+"/"+patient_uid+"_candidate.mhd", volume_regioned, volume_regioned.shape[::-1]) for cc in range(len(candidate_coords)): candidate_center = candidate_coords[cc] invalid_loc = False if mt.coord_overflow(candidate_center - window_half, image.shape) or mt.coord_overflow( candidate_center + BOX_SIZE - window_half, image.shape): invalid_loc = True continue for index_search, v_center_search in v_centers: rpos = v_center_search - candidate_center if abs(rpos[0]) < window_half and abs( rpos[1] ) < window_half and abs( rpos[2] ) < window_half: #the negative sample is located in the positive location invalid_loc = True break if not invalid_loc: zyx_1 = candidate_center - window_half zyx_2 = candidate_center + BOX_SIZE - window_half nodule_box = np.zeros( [BOX_SIZE, BOX_SIZE, BOX_SIZE], np.int16) #---nodule_box_size = 45 img_crop = image[zyx_1[0]:zyx_2[0], zyx_1[1]:zyx_2[1], zyx_1[2]:zyx_2[2]] #---截取立方体 img_crop[img_crop < -1024] = -1024 #---设置窗宽,小于-1000的体素值设置为-1000 if img_crop.shape[0] != BOX_SIZE | img_crop.shape[ 1] != BOX_SIZE | img_crop.shape[2] != BOX_SIZE: print("error in resmapleing shape") try: nodule_box[ 0:BOX_SIZE, 0:BOX_SIZE, 0: BOX_SIZE] = img_crop # ---将截取的立方体置于nodule_box except: print("random error") continue #nodule_box[nodule_box == 0] = -1024#---将填充的0设置为-1000,可能有极少数的体素由0=>-1000,不过可以忽略不计 self.save_nonnodule(nodule_box, patient_uid + "_cc_" + str(cc)) print("candidate sampling done") print('Done for this patient!\n\n') print('Done for all!')
import keras.backend.tensorflow_backend as ktb from keras.models import load_model from glob import glob from toolbox import MITools as mt from toolbox import CTViewer as cv from toolbox import CandidateDetection as cd from toolbox import Lung_Pattern_Segmentation as lps from toolbox import Lung_Cluster as lc try: from tqdm import tqdm # long waits are not fun except: print('tqdm 是一个轻量级的进度条小包。。。') tqdm = lambda x: x ENVIRONMENT_FILE = "./constants.txt" IMG_WIDTH, IMG_HEIGHT, NUM_VIEW, MAX_BOUND, MIN_BOUND, PIXEL_MEAN = mt.read_environment( ENVIRONMENT_FILE) WINDOW_SIZE = min(IMG_WIDTH, IMG_HEIGHT) NUM_CHANNELS = 3 CANDIDATE_BATCH = 5000 test_paths = ["./TIANCHI_data/train"] net_file = "./models_keras/tianchi-vgg-2D-v3_solid/tianchi-vgg-2D-v3_solid.h5" vision_path = "./detection_vision/train" result_file = "./result5.csv" all_patients = [] for path in test_paths: all_patients += glob(path + "/*.mhd") if len(all_patients) <= 0: print("No patient found") exit()
results = [] CPMs = [] CPMs2 = [] test_patients = all_patients bt.filelist_store(all_patients, evaluation_path + "/patientfilelist.log") #random.shuffle(test_patients) for p in range(len(test_patients)): result = [] patient = test_patients[p] #uid = mt.get_mhd_uid(patient) #annotations = mt.get_luna_annotations(uid, annotation_file) #if len(annotations)==0: #print('%d/%d patient %s has no annotations, ignore it.' %(p+1, len(test_patients), uid)) #continue full_scan, image_info = mt.read_dicom_scan(patient) uid = image_info['uid'] print('%d/%d processing patient:%s' % (p + 1, len(test_patients), uid)) origin = image_info['origin'] old_spacing = image_info['spacing'] image, new_spacing = mt.resample(full_scan, old_spacing) #resample print('Resample Done. time:{}s'.format(time.time() - start_time)) #make a real nodule visualization real_nodules = [] #for annotation in annotations: #real_nodule = np.int_([abs(annotation[2]-origin[0])/new_spacing[0], abs(annotation[1]-origin[1])/new_spacing[1], abs(annotation[0]-origin[2])/new_spacing[2]]) #real_nodules.append(real_nodule) if 'vision_path' in dir() and 'vision_path' is not None: annotation_vision = cvm.view_coordinates(image,
posbatchend = min(pbi + BATCH_SIZE, positive_train_num) for pti in range(pbi, posbatchend): data_index = positive_train_indices[pti] pfile = tpfiles[data_index] positive_data = np.load(pfile) nodule_diameter = 0 if "positive_batch" not in dir(): positive_batch = extract_volumes( positive_data, nodule_diameter=nodule_diameter) else: positive_batch = np.concatenate( (positive_batch, extract_volumes(positive_data, nodule_diameter=nodule_diameter)), axis=0) positive_batch = mt.medical_normalization(positive_batch, MAX_BOUND, MIN_BOUND, PIXEL_MEAN, True) positive_label = np.zeros(shape=(positive_batch.shape[0], 2), dtype=float) positive_label[:, 0] = 1 predictions, accuracies = sess.run([out_fc2, correct_prediction], { volume_input: positive_batch, real_label: positive_label }) if not AUGMENTATION: for a in range(len(accuracies)): data_index = positive_train_indices[pbi + a] pfile = tpfiles[data_index] if accuracies[a]: correct_output.write(pfile + " {}\n".format(predictions[a])) else:
def detection(**kwargs): opt.parse(kwargs) test_paths = ["/data/fyl/datasets/Tianchi_Lung_Disease/train"] #test_sample_filelist = "/data/fyl/models_pytorch/DensecropNet_detection_test_rfold1/filelist_val_fold0.log" #net_file = "/data/fyl/models_pytorch/DensecropNet_stripe_detection_rfold1/DensecropNet_stripe_detection_rfold1_epoch27" annotation_file = "/data/fyl/datasets/Tianchi_Lung_Disease/chestCT_round1_annotation.csv" #candidate_file = "/data/fyl/datasets/Tianchi_Lung_Disease/candidate.csv" evaluation_path = "./experiments_dt/evaluations_tianchild_densecropnet_nodule_rfold1" #evaluation_path = "experiments_dt/evaluations_test" #vision_path = evaluation_path result_file = evaluation_path + "/result.csv" hard_negatives_file = evaluation_path + "/hard_negatives.csv" region_size = opt.input_size batch_size = opt.batch_size label_dict = { 'noduleclass': 1, 'stripeclass': 5, 'arterioclass': 31, 'lymphnodecalclass': 32 } label = label_dict[opt.label_mode] use_gpu = opt.use_gpu net_file = opt.load_model_path if 'vision_path' in dir() and vision_path is not None and not os.access( vision_path, os.F_OK): os.makedirs(vision_path) #if os.access(evaluation_path, os.F_OK): shutil.rmtree(evaluation_path) if not os.access(evaluation_path, os.F_OK): os.makedirs(evaluation_path) if "test_paths" in dir(): all_patients = [] for path in test_paths: all_patients += glob(path + "/*.mhd") if len(all_patients) <= 0: print("No patient found") exit() else: print("No test data") exit() if hasattr(opt, 'filelists') and 'test' in opt.filelists.keys(): test_samples = bt.filelist_load(opt.filelists['test']) test_uids = [] for test_sample in test_samples: sample_uid = os.path.basename(test_sample).split('_')[0] if sample_uid not in test_uids: test_uids.append(sample_uid) pd.DataFrame(data=test_uids, columns=['series_uid' ]).to_csv(result_path + '/patients_uid.csv', index=False) #else: # for path in opt.filelists['test']: # test_samples = glob(path + '/*.mhd') #model = models.DensecropNet(input_size=region_size, drop_rate=0, growth_rate=64, num_blocks=4, num_fin_growth=3).eval() model = getattr(models, opt.model)(input_size=region_size, **opt.model_setup).eval() if net_file is not None: model.load(net_file) print('model loaded from %s' % (net_file)) shutil.copyfile(net_file, evaluation_path + '/' + net_file.split('/')[-1]) #model.eval() if use_gpu: model.cuda() start_time = time.time() #patient_evaluations = open(evaluation_path + "/patient_evaluations.log", "w") results = [] CPMs = [] CPMs2 = [] hard_negatives = [] test_patients = all_patients #random.shuffle(test_patients) bt.filelist_store(test_patients, evaluation_path + "/patientfilelist.log") for p in range(len(test_patients)): patient = test_patients[p] #patient = "./LUNA16/subset9/1.3.6.1.4.1.14519.5.2.1.6279.6001.212608679077007918190529579976.mhd" #patient = "./LUNA16/subset9/1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408431413578140925249.mhd" #patient = "./TIANCHI_examples/LKDS-00005.mhd" uid = mt.get_mhd_uid(patient) if 'test_uids' in dir() and uid not in test_uids: print("%d/%d patient %s not belong to test set" % (p + 1, len(test_patients), uid)) continue #if uid!='656867': # continue print('%d/%d processing patient:%s' % (p + 1, len(test_patients), uid)) full_image_info = sitk.ReadImage(patient) full_scan = sitk.GetArrayFromImage(full_image_info) origin = np.array(full_image_info.GetOrigin( ))[::-1] #the order of origin and old_spacing is initially [z,y,x] old_spacing = np.array(full_image_info.GetSpacing())[::-1] image, new_spacing = mt.resample(full_scan, old_spacing, np.array([1, 1, 1])) #image = np.load(patient) #new_spacing = np.array([1, 1, 1]) #origin = np.array([0, 0, 0]) print('Resample Done. time:{}s'.format(time.time() - start_time)) if 'annotation_file' in dir(): annotations = mt.get_challenge_annotations(uid, annotation_file, label) if len(annotations) == 0: print("%d/%d patient %s has no annotations, ignore it." % (p + 1, len(test_patients), uid)) #patient_evaluations.write('%d/%d patient %s has no annotations, ignore it\n' %(p+1, len(test_patients), uid)) continue #make a real lesion visualization if 'vision_path' in dir() and vision_path is not None: real_lesions = [] for annotation in annotations: #real_lesion = np.int_([abs(annotation[2]-origin[0])/new_spacing[0], abs(annotation[1]-origin[1])/new_spacing[1], abs(annotation[0]-origin[2])/new_spacing[2]]) real_lesion = mt.coord_conversion(annotation[:3][::-1], origin, old_spacing, full_scan.shape, image.shape, dir_array=True) real_lesions.append(real_lesion) annotation_vision = cvm.view_coordinates(image, real_lesions, window_size=10, reverse=False, slicewise=False, show=False) np.save(vision_path + "/" + uid + "_annotations.npy", annotation_vision) if 'candidate_file' in dir(): print('Detection with given candidates:{}'.format(candidate_file)) candidate_coords = nd.luna_candidate(image, uid, origin, new_spacing, candidate_file, lung_segment=True, vision_path=vision_path) if 'vision_path' in dir() and vision_path is not None: volume_candidate = cvm.view_coordinates(image, candidate_coords, window_size=10, reverse=False, slicewise=False, show=False) np.save(vision_path + "/" + uid + "_candidate.npy", volume_candidate) print('Candidate Done. time:{}s'.format(time.time() - start_time)) print('candidate number:%d' % (len(candidate_coords))) candidate_predictions = nd.precise_detection_pt( image, region_size, candidate_coords, model, None, batch_size, use_gpu=use_gpu, prediction_threshold=0.4) positive_predictions = candidate_predictions > 0 predicted_coords = np.delete( candidate_coords, np.logical_not(positive_predictions).nonzero()[0], axis=0) predictions = candidate_predictions[positive_predictions] lesion_center_predictions = nd.prediction_combine( predicted_coords, predictions) if 'vision_path' in dir() and vision_path is not None: volume_predicted = cvm.view_coordinates(image, predicted_coords, window_size=10, reverse=False, slicewise=False, show=False) np.save(vision_path + "/" + uid + "_predicted.npy", volume_predicted) lesions = [] for nc in range(len(lesion_center_predictions)): lesions.append(np.int_(lesion_center_predictions[nc][0:3])) volume_prediction = cvm.view_coordinates(image, lesions, window_size=10, reverse=False, slicewise=False, show=False) np.save(vision_path + "/" + uid + "_prediction.npy", volume_prediction) else: print('Detection with slic candidates') candidate_results = nd.slic_candidate(image, 30, focus_area='lung') if candidate_results is None: continue candidate_coords, candidate_labels, cluster_labels = candidate_results if 'vision_path' in dir() and vision_path is not None: np.save(vision_path + "/" + uid + "_segmask.npy", cluster_labels) #segresult = lc.segment_vision(image, cluster_labels) #np.save(vision_path + "/" + uid + "_segresult.npy", segresult) print('Candidate Done. time:{}s'.format(time.time() - start_time)) print('candidate number:%d' % (len(candidate_coords))) candidate_predictions = nd.precise_detection_pt( image, region_size, candidate_coords, model, None, batch_size, use_gpu=use_gpu, prediction_threshold=0.4) positive_predictions = candidate_predictions > 0 result_predictions, result_labels = nd.predictions_map_fast( cluster_labels, candidate_predictions[positive_predictions], candidate_labels[positive_predictions]) if 'vision_path' in dir() and vision_path is not None: np.save(vision_path + "/" + uid + "_detlabels.npy", result_labels) np.save(vision_path + "/" + uid + "_detpredictions.npy", result_predictions) #detresult = lc.segment_vision(image, result_labels) #np.save(vision_path+"/"+uid+"_detresult.npy", detresult) lesion_center_predictions = nd.prediction_centering_fast( result_predictions) #lesion_center_predictions, prediction_labels = nd.prediction_cluster(result_predictions) if 'vision_path' in dir() and vision_path is not None: lesions = [] for nc in range(len(lesion_center_predictions)): lesions.append(np.int_(lesion_center_predictions[nc][0:3])) volume_predicted = cvm.view_coordinates(result_predictions * 1000, lesions, window_size=10, reverse=False, slicewise=False, show=False) np.save(vision_path + "/" + uid + "_prediction.npy", volume_predicted) if 'prediction_labels' in dir(): prediction_cluster_vision = lc.segment_color_vision( prediction_labels) np.save( vision_path + "/" + uid + "_prediction_clusters.npy", prediction_cluster_vision) print('Detection Done. time:{}s'.format(time.time() - start_time)) ''' #randomly create a result for testing lesion_center_predictions = [] for nc in range(10): lesion_center_predictions.append([random.randint(0,image.shape[0]-1), random.randint(0,image.shape[1]-1), random.randint(0,image.shape[2]-1), random.random()]) ''' if len(lesion_center_predictions) < 1000: print('Nodule coordinations:') if len(lesion_center_predictions) <= 0: print('none') for nc in range(len(lesion_center_predictions)): print('{} {} {} {}'.format(lesion_center_predictions[nc][0], lesion_center_predictions[nc][1], lesion_center_predictions[nc][2], lesion_center_predictions[nc][3])) for nc in range(len(lesion_center_predictions)): #the output coordination order is [x,y,z], while the order for volume image should be [z,y,x] result = [uid] result.extend( mt.coord_conversion(lesion_center_predictions[nc][:3], origin, old_spacing, full_scan.shape, image.shape, dir_array=False)[::-1]) if label is not None: result.append(label) result.append(lesion_center_predictions[nc][3]) results.append(result) #results.append([uid, (lesion_center_predictions[nc][2]*new_spacing[2])+origin[2], (lesion_center_predictions[nc][1]*new_spacing[1])+origin[1], (lesion_center_predictions[nc][0]*new_spacing[0])+origin[0], lesion_center_predictions[nc][3]]) #if len(lesion_center_predictions)<1000: #print('{} {} {} {}' .format(lesion_center_predictions[nc][0], lesion_center_predictions[nc][1], lesion_center_predictions[nc][2], lesion_center_predictions[nc][3])) columns = ['seriesuid', 'coordX', 'coordY', 'coordZ', 'probability'] if label is not None: columns.insert(4, 'class') result_frame = pd.DataFrame(data=results, columns=columns) result_frame.to_csv(result_file, index=False, float_format='%.4f') np.save(evaluation_path + '/result.npy', np.array(results)) if 'annotation_file' in dir(): assessment = eva.detection_assessment(results, annotation_file, label=label) if assessment is None: print('assessment failed') #patient_evaluations.write('%d/%d patient %s assessment failed\n' %(p+1, len(test_patients), uid)) continue #num_scans, FPsperscan, sensitivities, CPMscore, FPsperscan2, sensitivities2, CPMscore2, lesions_detected = assessment num_scans = assessment['num_scans'] FPsperscan, sensitivities = assessment['FROC'] CPMscore = assessment['CPM'] prediction_order = assessment['prediction_order'] lesions_detected = assessment['detection_cites'] if len(FPsperscan) <= 0 or len(sensitivities) <= 0: print("No results to evaluate, continue") else: eva.evaluation_vision(CPMs, num_scans, FPsperscan, sensitivities, CPMscore, lesions_detected, output_path=evaluation_path) #patient_evaluations.write('%d/%d patient %s CPM score:%f\n' %(p+1, len(test_patients), uid, single_assessment[6])) print('Evaluation Done. time:{}s'.format(time.time() - start_time)) num_positive = (lesions_detected >= 0).nonzero()[0].size for ndi in range(len(lesions_detected)): if results[prediction_order[ndi]][-1] <= 0.5 or ( lesions_detected[:ndi] >= 0).nonzero()[0].size == num_positive: break if lesions_detected[ndi] == -1: hard_negatives.append(results[prediction_order[ndi]]) hard_negatives_frame = pd.DataFrame(data=hard_negatives, columns=columns) hard_negatives_frame.to_csv(hard_negatives_file, index=False, float_format='%.4f') print('Hard Negatives Extracted. time:{}s'.format(time.time() - start_time)) print('Overall Detection Done')
def detection_fusion( test_path=None, result_path="./experiments_dt/evaluations_tianchild_densecropnet_31,32", **kwargs): opt.parse(kwargs) if test_path is None: test_paths = ["/data/fyl/datasets/Tianchi_Lung_Disease/train"] else: test_paths = [test_path] #test_sample_filelist = "/data/fyl/models_pytorch/DensecropNet_detection_test_rfold1/filelist_val_fold0.log" net_files = [ "/data/fyl/models_pytorch/DensecropNet_arterio_detection_rfold1/DensecropNet_arterio_detection_rfold1_epoch2", "/data/fyl/models_pytorch/DensecropNet_lymphnodecal_detection_rfold1/DensecropNet_lymphnodecal_detection_rfold1_epoch2" ] annotation_file = "/data/fyl/datasets/Tianchi_Lung_Disease/chestCT_round1_annotation.csv" #candidate_file = "/data/fyl/datasets/Tianchi_Lung_Disease/candidate.csv" labels = [31, 32] #result_path = "./experiments_dt/evaluations_tianchild_densecropnet_fusion" #vision_path = result_path #result_file = result_path + "/result.csv" hard_negatives_file = result_path + "/hard_negatives.csv" region_size = opt.input_size batch_size = opt.batch_size use_gpu = opt.use_gpu if 'vision_path' in dir() and vision_path is not None and not os.access( vision_path, os.F_OK): os.makedirs(vision_path) #if os.access(result_path, os.F_OK): shutil.rmtree(result_path) if not os.access(result_path, os.F_OK): os.makedirs(result_path) if "test_paths" in dir(): all_patients = [] for path in test_paths: all_patients += glob(path + "/*.mhd") if len(all_patients) <= 0: print("No patient found") exit() else: print("No test data") exit() if hasattr(opt, 'filelists') and 'test' in opt.filelists.keys(): test_samples = bt.filelist_load(opt.filelists['test']) test_uids = [] for test_sample in test_samples: sample_uid = os.path.basename(test_sample).split('_')[0] if sample_uid not in test_uids: test_uids.append(sample_uid) pd.DataFrame(data=test_uids, columns=['series_uid' ]).to_csv(result_path + '/patients_uid.csv', index=False) #else: # for path in opt.filelists['test']: # test_samples = glob(path + '/*.mhd') #model = models.DensecropNet(input_size=region_size, drop_rate=0, growth_rate=64, num_blocks=4, num_fin_growth=3).eval() networks = [ getattr(models, opt.model)(input_size=region_size, **opt.model_setup).eval() for m in range(len(net_files)) ] for n in range(len(net_files)): networks[n].load(net_files[n]) print('model loaded from %s' % (net_files[n])) shutil.copyfile(net_files[n], result_path + '/' + net_files[n].split('/')[-1]) if use_gpu: networks[n].cuda() start_time = time.time() #patient_evaluations = open(result_path + "/patient_evaluations.log", "w") results = [] labeled_results = [[] for l in range(len(labels))] CPMs = [[] for l in range(len(labels))] #hard_negatives = [] test_patients = all_patients #random.shuffle(test_patients) bt.filelist_store(test_patients, result_path + "/patientfilelist.log") for p in range(len(test_patients)): patient = test_patients[p] uid = mt.get_mhd_uid(patient) if 'test_uids' in dir() and uid not in test_uids: print("%d/%d patient %s not belong to test set" % (p + 1, len(test_patients), uid)) continue print('%d/%d processing patient:%s' % (p + 1, len(test_patients), uid)) full_image_info = sitk.ReadImage(patient) full_scan = sitk.GetArrayFromImage(full_image_info) origin = np.array(full_image_info.GetOrigin( ))[::-1] #the order of origin and old_spacing is initially [z,y,x] old_spacing = np.array(full_image_info.GetSpacing())[::-1] image, new_spacing = mt.resample(full_scan, old_spacing, np.array([1, 1, 1])) #image = np.load(patient) #new_spacing = np.array([1, 1, 1]) #origin = np.array([0, 0, 0]) print('Resample Done. time:{}s'.format(time.time() - start_time)) candidate_results = nd.slic_candidate(image, 20, focus_area='body') if candidate_results is None: continue candidate_coords, candidate_labels, cluster_labels = candidate_results if 'vision_path' in dir() and vision_path is not None: np.save(vision_path + "/" + uid + "_segmask.npy", cluster_labels) #segresult = lc.segment_vision(image, cluster_labels) #np.save(vision_path + "/" + uid + "_segresult.npy", segresult) print('Candidate Done. time:{}s'.format(time.time() - start_time)) print('candidate number:%d' % (len(candidate_coords))) candidate_predictions = nd.precise_detection_pt( image, region_size, candidate_coords, networks, None, batch_size, use_gpu=use_gpu, prediction_threshold=0.4) labeled_predictions = [] for l in range(len(labels)): label = labels[l] print('label: %d' % (label)) evaluation_path = result_path + '/' + str(label) if not os.access(evaluation_path, os.F_OK): os.makedirs(evaluation_path) if 'annotation_file' in dir(): annotations = mt.get_challenge_annotations(uid, annotation_file, label=label) if len(annotations) == 0: print("%d/%d patient %s has no annotations, ignore it." % (p + 1, len(test_patients), uid)) #patient_evaluations.write('%d/%d patient %s has no annotations, ignore it\n' %(p+1, len(test_patients), uid)) continue #make a real lesion visualization if 'vision_path' in dir() and vision_path is not None: real_lesions = [] for annotation in annotations: #real_lesion = np.int_([abs(annotation[2]-origin[0])/new_spacing[0], abs(annotation[1]-origin[1])/new_spacing[1], abs(annotation[0]-origin[2])/new_spacing[2]]) real_lesion = mt.coord_conversion(annotation[:3][::-1], origin, old_spacing, full_scan.shape, image.shape, dir_array=True) real_lesions.append(real_lesion) annotation_vision = cvm.view_coordinates(image, real_lesions, window_size=10, reverse=False, slicewise=False, show=False) np.save(evaluation_path + "/" + uid + "_annotations.npy", annotation_vision) positive_predictions = candidate_predictions[l] > 0 result_predictions, result_labels = nd.predictions_map_fast( cluster_labels, candidate_predictions[l][positive_predictions], candidate_labels[positive_predictions]) labeled_predictions.append(result_predictions) if 'vision_path' in dir() and vision_path is not None: np.save(evaluation_path + "/" + uid + "_detlabels.npy", result_labels) np.save(evaluation_path + "/" + uid + "_detpredictions.npy", result_predictions) #detresult = lc.segment_vision(image, result_labels) #np.save(evaluation_path+"/"+uid+"_detresult.npy", detresult) lesion_center_predictions = nd.prediction_centering_fast( result_predictions) #lesion_center_predictions, prediction_labels = nd.prediction_cluster(result_predictions) if 'vision_path' in dir() and vision_path is not None: lesions = [] for nc in range(len(lesion_center_predictions)): lesions.append(np.int_(lesion_center_predictions[nc][0:3])) volume_predicted = cvm.view_coordinates(result_predictions * 1000, lesions, window_size=10, reverse=False, slicewise=False, show=False) np.save(evaluation_path + "/" + uid + "_prediction.npy", volume_predicted) if 'prediction_labels' in dir(): prediction_cluster_vision = lc.segment_color_vision( prediction_labels) np.save( evaluation_path + "/" + uid + "_prediction_clusters.npy", prediction_cluster_vision) print('Detection Done. time:{}s'.format(time.time() - start_time)) ''' #randomly create a result for testing lesion_center_predictions = [] for nc in range(10): lesion_center_predictions.append([random.randint(0,image.shape[0]-1), random.randint(0,image.shape[1]-1), random.randint(0,image.shape[2]-1), random.random()]) ''' for nc in range(len(lesion_center_predictions)): #the output coordination order is [x,y,z], while the order for volume image should be [z,y,x] result = [uid] result.extend( mt.coord_conversion(lesion_center_predictions[nc][:3], origin, old_spacing, full_scan.shape, image.shape, dir_array=False)[::-1]) if label is not None: result.append(label) result.append(lesion_center_predictions[nc][3]) #results.append(result) labeled_results[l].append(result) columns = [ 'seriesuid', 'coordX', 'coordY', 'coordZ', 'probability' ] if label is not None: columns.insert(4, 'class') result_frame = pd.DataFrame(data=labeled_results[l], columns=columns) result_frame.to_csv("{}/result_{}.csv".format( evaluation_path, label), index=False, float_format='%f') #np.save("{}/result_{}.npy"%(evaluation_path, label), np.array(results)) if 'annotation_file' in dir(): assessment = eva.detection_assessment(labeled_results[l], annotation_file, label=label) if assessment is None: print('assessment failed') #patient_evaluations.write('%d/%d patient %s assessment failed\n' %(p+1, len(test_patients), uid)) continue #num_scans, FPsperscan, sensitivities, CPMscore, FPsperscan2, sensitivities2, CPMscore2, lesions_detected = assessment num_scans = assessment['num_scans'] FPsperscan, sensitivities = assessment['FROC'] CPMscore = assessment['CPM'] prediction_order = assessment['prediction_order'] lesions_detected = assessment['detection_cites'] if len(FPsperscan) <= 0 or len(sensitivities) <= 0: print("No results to evaluate, continue") else: eva.evaluation_vision(CPMs[l], num_scans, FPsperscan, sensitivities, CPMscore, lesions_detected, output_path=evaluation_path) #patient_evaluations.write('%d/%d patient %s CPM score:%f\n' %(p+1, len(test_patients), uid, single_assessment[6])) print('Evaluation Done. time:{}s'.format(time.time() - start_time)) labeled_predictions = np.array(labeled_predictions) prediction_labels = np.argmax(labeled_predictions, axis=0) predictions_fusion = labeled_predictions.sum(axis=0) / 4.0 fused_center_predictions = nd.prediction_centering_fast( predictions_fusion) if 'vision_path' in dir() and vision_path is not None: np.save(vision_path + "/" + uid + "_classlabels.npy", prediction_labels) for lcp in range(len(fused_center_predictions)): #the output coordination order is [x,y,z], while the order for volume image should be [z,y,x] center = fused_center_predictions[lcp] result = [uid] result.extend( mt.coord_conversion(center[:3], origin, old_spacing, full_scan.shape, image.shape, dir_array=False)[::-1]) result.append(labels[prediction_labels[center[0], center[1], center[2]]]) result.append(center[3]) results.append(result) columns = ['seriesuid', 'coordX', 'coordY', 'coordZ', 'probability'] if label is not None: columns.insert(4, 'class') result_frame = pd.DataFrame(data=results, columns=columns) result_frame.to_csv(result_path + '/result.csv', index=False, float_format='%f') np.save(result_path + '/result.npy', np.array(results)) print('Overall Detection Done')
train_label = np.zeros(shape=(train_num * NUM_VIEW, 2), dtype=bool) train_indices = range(train_num) random.shuffle(train_indices) val_num = val_num_positive + val_num_negative val_data = np.zeros(shape=(val_num * NUM_VIEW, IMG_WIDTH, IMG_HEIGHT), dtype=float) val_label = np.zeros(shape=(val_num * NUM_VIEW, 2), dtype=bool) val_indices = range(val_num) random.shuffle(val_indices) #patchs extraction patchs = np.zeros(shape=(NUM_VIEW, IMG_WIDTH, IMG_HEIGHT), dtype=float) for i in range(train_num): label = int(train_indices[i] < train_num_positive) data = np.load(tfiles[train_indices[i]]) patchs = mt.make_patchs(data) for j in range(NUM_VIEW): train_label[i * NUM_VIEW + j][1 - label] = 1 train_data[ i * NUM_VIEW + j] = (patchs[j] - MIN_BOUND) / (MAX_BOUND - MIN_BOUND) - PIXEL_MEAN for i in range(val_num): label = int(val_indices[i] < val_num_positive) data = np.load(tfiles[val_indices[i]]) patchs = mt.make_patchs(data) for j in range(NUM_VIEW): val_label[i * NUM_VIEW + j][1 - label] = 1 val_data[ i * NUM_VIEW + j] = (patchs[j] - MIN_BOUND) / (MAX_BOUND - MIN_BOUND) - PIXEL_MEAN
for bi in tqdm(range(0, test_num, BATCH_SIZE)): batchend = min(bi + BATCH_SIZE, test_num) batch_size = batchend - bi train_data = np.zeros( (batch_size, REGION_SIZE, REGION_SIZE, REGION_SIZE), dtype=float) train_label = np.zeros((batch_size, 2), dtype=float) for ti in range(bi, batchend): file = test_files[ti] malignancy = lt.sample_malignancy(file) if malignancy >= 0: train_label[ti - bi][1 - malignancy] = 1 else: print('unknown malignancy, skip') continue data = np.load(file) data_cropped = mt.crop(data, (REGION_SIZE, REGION_SIZE, REGION_SIZE)) train_data[ti - bi] = mt.medical_normalization(data_cropped, MAX_BOUND, MIN_BOUND, PIXEL_MEAN, NORM_CROP, input_copy=False) accuracy, prediction = sess.run([batch_accuracy, prediction_fusion], { volume_input: train_data, real_label: train_label }) total_accuracy += accuracy * batch_size predictions[bi:batchend] = prediction total_accuracy /= float(test_num) print("accuracy:{}".format(total_accuracy)) #print("prediction:\n{}" .format(predictions))
import tensorflow as tf import numpy as np import os import shutil import glob import math import random from toolbox import MITools as mt from toolbox import TensorflowTools as tft constants = mt.read_constants("./constants2.txt") REGION_SIZE = constants["REGION_SIZE"] MAX_BOUND = constants["MAX_BOUND"] MIN_BOUND = constants["MIN_BOUND"] PIXEL_MEAN = constants["PIXEL_MEAN"] NUM_EPOCH = 200 SNAPSHOT_EPOCH = 10 DECAY_EPOCH = 200 INITIAL_LEARNING_RATE = 0.001 DECAY_LEARNING_RATE = 1.0 BATCH_SIZE = 30 VALIDATION_RATE = 0.2 store_path = "models_tensorflow" net_store_name = "luna_3D_l3454-512-2_bn_test" #net_init_file = "models_tensorflow/luna_3D_l3454-512-2_bn/luna_3D_l3454-512-2_bn_epoch50" data_dir1 = "luna_cubes_56_overbound" data_dir2 = "SLH_cubes_56" net_store_path = store_path + "/" + net_store_name
import tensorflow as tf import numpy as np import os import shutil import glob import math import random from toolbox import MITools as mt from toolbox import TensorflowTools as tft constants = mt.read_constants("./constants2.txt") REGION_SIZE = constants["REGION_SIZE"] MAX_BOUND = float(constants["MAX_BOUND"]) MIN_BOUND = float(constants["MIN_BOUND"]) PIXEL_MEAN = float(constants["PIXEL_MEAN"]) #NUM_EPOCH = 200 SNAPSHOT_EPOCH = 5 #DECAY_EPOCH = 0 #INITIAL_LEARNING_RATE = 0.001 #DECAY_LEARNING_RATE = 1.0 BATCH_SIZE = 30 VALIDATION_RATE = 0.2 AUGMENTATION = False ALLNEGATIVE = True store_path = "models_tensorflow" net_store_name = "luna_tianchi_slh_3D_l3454-512-2_bn2_test" net_init_file = "models_tensorflow/luna_tianchi_slh_3D_l3454-512-2_bn2_stage3/luna_tianchi_slh_3D_l3454-512-2_bn2_stage3_epoch10/luna_tianchi_slh_3D_l3454-512-2_bn2_stage3_epoch10"
positive_batch_size = (1 - int(ALL_NEGATIVE)) * ( int(BATCH_SIZE / 2 / aug_proportion) - 1 ) + 1 #if augmentatiion implemented then set batch size to 1 or half of BATCH_SIZE #negative_batch_size = int(positive_batch_size * np_proportion) for pbi in tqdm(range(0, positive_train_num, positive_batch_size)): #if AUGMENTATION: # print("training process:%d/%d" %(pbi, positive_train_num)) posbatchend = min(pbi + positive_batch_size, positive_train_num) for pti in range(pbi, posbatchend): data_index = positive_train_indices[pti] pfile = tpfiles[data_index] #pfile = 'luna_cubes_56_overbound/subset5/npy/1.3.6.1.4.1.14519.5.2.1.6279.6001.112740418331256326754121315800_34_ob_annotations.npy' if pfile.split('/')[0].find("luna") >= 0: patient_uid, nodule_diameter = mt.get_annotation_informations( pfile, "luna_cubes_56_overbound/luna_annotations.csv") elif pfile.split('/')[0].find("tianchi") >= 0: patient_uid, nodule_diameter = mt.get_annotation_informations( pfile, "tianchi_cubes_56_overbound/tianchi_annotations.csv" ) else: patient_uid = mt.get_volume_informations(pfile)[0] nodule_diameter = 0 positive_data = np.load(pfile) if "positive_batch" not in dir(): #positive_batch = mt.extract_volumes(positive_data, centering=CENTERING, nodule_diameter=nodule_diameter, scale_augment=SCALE_AUGMENTATION, translation_augment=TRANSLATION_AUGMENTATION, rotation_augment=ROTATION_AUGMENTATION, flip_augment=FLIP_AUGMENTATION) positive_batch = extract_volumes( positive_data, nodule_diameter=nodule_diameter) else:
def annotations_crop( self, randsample=True, candsample=False, overbound=False, augment=False ): #the term 'augment' is invalid when 'overbound' is True if os.access(self.output_path, os.F_OK): shutil.rmtree(self.output_path) os.makedirs(self.output_path) os.mkdir(self.nodules_npy_path) #训练用正样本路径 os.mkdir(self.all_annotations_mhd_path) #检查用正样本路径 os.mkdir(self.nonnodule_npy_path) #训练用负样本路径 os.mkdir(self.no_annotation_mhd_path) #检查用负样本路径 if not os.access(self.vision_path, os.F_OK): os.makedirs(self.vision_path) for patient in enumerate(tqdm(self.ls_all_patients)): patient = patient[1] #patient = './LUNA16/subset9\\1.3.6.1.4.1.14519.5.2.1.6279.6001.114914167428485563471327801935.mhd' print(patient) # 检查这个病人有没有大于3mm的结节标注 if patient not in self.df_annotations.file.values: print('Patient ' + patient + 'Not exist!') continue patient_uid = mt.get_serie_uid(patient) patient_nodules = self.df_annotations[self.df_annotations.file == patient] full_image_info = sitk.ReadImage(patient) full_scan = sitk.GetArrayFromImage(full_image_info) origin = np.array( full_image_info.GetOrigin())[::-1] #---获取“体素空间”中结节中心的坐标 old_spacing = np.array(full_image_info.GetSpacing() )[::-1] #---该CT在“世界空间”中各个方向上相邻单位的体素的间距 image, new_spacing = mt.resample(full_scan, old_spacing) #---重采样 print('resample done') v_centers = [] center_coords = [] for index, nodule in patient_nodules.iterrows(): nodule_diameter = nodule.diameter_mm nodule_center = np.array( [nodule.coordZ, nodule.coordY, nodule.coordX]) #---获取“世界空间”中结节中心的坐标 v_center = np.rint( (nodule_center - origin) / new_spacing) #映射到“体素空间”中的坐标 v_center = np.array(v_center, dtype=int) v_centers.append([index, nodule_diameter, v_center]) center_coords.append(v_center) #volume_regioned = cvm.view_coordinations(image, center_coords, window_size=int(math.ceil(1.5*nodule_diameter)), reverse=False, slicewise=False, show=False) #np.save(self.vision_path+"/"+patient_uid+"_annotated.npy", volume_regioned) #---这一系列的if语句是根据“判断一个结节的癌性与否需要结合该结节周边位置的阴影和位置信息”而来,故每个结节都获取了比该结节尺寸略大的3D体素 #get annotations nodule window_half = int(BOX_SIZE / 2) if overbound: num_translations = 1 for index, nodule_diameter, v_center in v_centers: zyx_1 = v_center - BOX_SIZE # 注意是: Z, Y, X zyx_2 = v_center + BOX_SIZE if mt.coord_overflow(zyx_1, image.shape) or mt.coord_overflow( zyx_2, image.shape): continue nodule_box = np.zeros( [2 * BOX_SIZE, 2 * BOX_SIZE, 2 * BOX_SIZE], np.int16) # ---nodule_box_size = 45 img_crop = image[zyx_1[0]:zyx_2[0], zyx_1[1]:zyx_2[1], zyx_1[2]:zyx_2[2]] # ---截取立方体 img_crop[img_crop < -1024] = -1024 # ---设置窗宽,小于-1024的体素值设置为-1024 try: nodule_box = img_crop[ 0:2 * BOX_SIZE, 0:2 * BOX_SIZE, 0:2 * BOX_SIZE] # ---将截取的立方体置于nodule_box except: print("annotation error") continue #nodule_box[nodule_box == 0] = -1024 # ---将填充的0设置为-1000,可能有极少数的体素由0=>-1000,不过可以忽略不计 self.save_annotations_nodule( nodule_box, patient_uid + "_" + str(index) + "_ob") else: if not augment: scales = [1.0] translations = np.array([0, 0, 0]) else: scales = [0.8, 1.0, 1.25] #translations = np.array([[0,0,0],[0,0,1],[0,0,-1],[0,1,0],[0,-1,0],[1,0,0],[-1,0,0]], dtype=float) translations = np.array( [[0, 0, 0], [0, 0, 1], [0, 0, -1], [0, 1, 0], [0, math.sqrt(0.5), math.sqrt(0.5)], [0, math.sqrt(0.5), -math.sqrt(0.5)], [0, -1, 0], [0, -math.sqrt(0.5), math.sqrt(0.5)], [0, -math.sqrt(0.5), -math.sqrt(0.5)], [1, 0, 0], [math.sqrt(0.5), 0, math.sqrt(0.5)], [math.sqrt(0.5), 0, -math.sqrt(0.5)], [math.sqrt(0.5), math.sqrt(0.5), 0], [ math.sqrt(0.3333), math.sqrt(0.3333), math.sqrt(0.3333) ], [ math.sqrt(0.3333), math.sqrt(0.3333), -math.sqrt(0.3333) ], [math.sqrt(0.5), -math.sqrt(0.5), 0], [ math.sqrt(0.3333), -math.sqrt(0.3333), math.sqrt(0.3333) ], [ math.sqrt(0.3333), -math.sqrt(0.3333), -math.sqrt(0.3333) ], [-1, 0, 0], [-math.sqrt(0.5), 0, math.sqrt(0.5)], [-math.sqrt(0.5), 0, -math.sqrt(0.5)], [-math.sqrt(0.5), math.sqrt(0.5), 0], [ -math.sqrt(0.3333), math.sqrt(0.3333), math.sqrt(0.3333) ], [ -math.sqrt(0.3333), math.sqrt(0.3333), -math.sqrt(0.3333) ], [-math.sqrt(0.5), -math.sqrt(0.5), 0], [ -math.sqrt(0.3333), -math.sqrt(0.3333), math.sqrt(0.3333) ], [ -math.sqrt(0.3333), -math.sqrt(0.3333), -math.sqrt(0.3333) ]]) num_translations = 3 for index, nodule_diameter, v_center in v_centers: for s in range(len(scales)): rt = np.zeros(num_translations, dtype=int) rt[1:num_translations] = np.random.choice( range(1, len(translations)), num_translations - 1, False) rt = np.sort(rt) for t in range(rt.size): scale = scales[s] box_size = int(np.ceil(BOX_SIZE * scale)) window_size = int(box_size / 2) translation = np.array(nodule_diameter / 2 * translations[rt[t]] / new_spacing, dtype=int) tnz = translation.nonzero() if tnz[0].size == 0 and t != 0: continue zyx_1 = v_center + translation - window_size # 注意是: Z, Y, X zyx_2 = v_center + translation + box_size - window_size if mt.coord_overflow( zyx_1, image.shape) or mt.coord_overflow( zyx_2, image.shape): continue nodule_box = np.zeros( [BOX_SIZE, BOX_SIZE, BOX_SIZE], np.int16) # ---nodule_box_size = 45 img_crop = image[zyx_1[0]:zyx_2[0], zyx_1[1]:zyx_2[1], zyx_1[2]:zyx_2[2]] # ---截取立方体 img_crop[ img_crop < -1024] = -1024 # ---设置窗宽,小于-1024的体素值设置为-1024 if not augment or scale == 1.0: img_crop_rescaled = img_crop else: img_crop_rescaled, rescaled_spacing = mt.resample( img_crop, new_spacing, new_spacing * scale) try: padding_shape = ( img_crop_rescaled.shape - np.array( [BOX_SIZE, BOX_SIZE, BOX_SIZE])) / 2 nodule_box = img_crop_rescaled[ padding_shape[0]:padding_shape[0] + BOX_SIZE, padding_shape[1]:padding_shape[1] + BOX_SIZE, padding_shape[2]:padding_shape[2] + BOX_SIZE] # ---将截取的立方体置于nodule_box except: # f = open("log.txt", 'a') # traceback.print_exc(file=f) # f.flush() # f.close() print("annotation error") continue #nodule_box[nodule_box == 0] = -1024 # ---将填充的0设置为-1000,可能有极少数的体素由0=>-1000,不过可以忽略不计 self.save_annotations_nodule( nodule_box, patient_uid + "_" + str(index) + "_" + str(s * rt.size + t)) print("annotation sampling done") #get candidate annotation nodule candidate_coords = [] if candsample: segimage, segmask, flag = cd.segment_lung_mask(image) if segimage is not None: #nodule_matrix, index = cd.candidate_detection(segimage,flag) #cluster_labels = lc.seed_mask_cluster(nodule_matrix, cluster_size=1000) cluster_labels = lc.seed_volume_cluster( image, segmask, eliminate_lower_size=-1) segresult = lc.segment_color_vision(image, cluster_labels) cvm.view_CT(segresult) #lc.cluster_size_vision(cluster_labels) exit() candidate_coords, _ = lc.cluster_centers(cluster_labels) #candidate_coords = lc.cluster_center_filter(image, candidate_coords) #the coordination order is [z,y,x] print("candidate number:%d" % (len(candidate_coords))) #volume_regioned = cv.view_coordinations(image, candidate_coords, window_size=10, reverse=False, slicewise=True, show=False) #mt.write_mhd_file(self.vision_path+"/"+patient_uid+"_candidate.mhd", volume_regioned, volume_regioned.shape[::-1]) for cc in range(len(candidate_coords)): candidate_center = candidate_coords[cc] invalid_loc = False if mt.coord_overflow(candidate_center - window_half, image.shape) or mt.coord_overflow( candidate_center + BOX_SIZE - window_half, image.shape): invalid_loc = True continue for index_search, nodule_diameter_search, v_center_search in v_centers: rpos = v_center_search - candidate_center if abs(rpos[0]) < window_half and abs( rpos[1] ) < window_half and abs( rpos[2] ) < window_half: #the negative sample is located in the positive location invalid_loc = True break if not invalid_loc: zyx_1 = candidate_center - window_half zyx_2 = candidate_center + BOX_SIZE - window_half nodule_box = np.zeros( [BOX_SIZE, BOX_SIZE, BOX_SIZE], np.int16) #---nodule_box_size = 45 img_crop = image[zyx_1[0]:zyx_2[0], zyx_1[1]:zyx_2[1], zyx_1[2]:zyx_2[2]] #---截取立方体 img_crop[img_crop < -1024] = -1024 #---设置窗宽,小于-1000的体素值设置为-1000 if img_crop.shape[0] != BOX_SIZE | img_crop.shape[ 1] != BOX_SIZE | img_crop.shape[2] != BOX_SIZE: print("error in resmapleing shape") try: nodule_box[ 0:BOX_SIZE, 0:BOX_SIZE, 0: BOX_SIZE] = img_crop # ---将截取的立方体置于nodule_box except: print("random error") continue #nodule_box[nodule_box == 0] = -1024#---将填充的0设置为-1000,可能有极少数的体素由0=>-1000,不过可以忽略不计 self.save_nonnodule(nodule_box, patient_uid + "_cc_" + str(cc)) print("candidate sampling done") #get random annotation nodule if randsample: if overbound: augnum = 100 elif augment: augnum = len(scales) * num_translations else: augnum = 1 if augnum * len(v_centers) > len(candidate_coords): randnum = augnum * len(v_centers) - len(candidate_coords) else: randnum = len(candidate_coords) for rc in range( randnum ): #the random samples is one-to-one number of nodules #index, nodule_diameter, v_center = v_centers[rc] rand_center = np.array([0, 0, 0]) # 注意是: Z, Y, X invalid_loc = True candidate_overlap = True while invalid_loc: invalid_loc = False candidate_overlap = False for axis in range(rand_center.size): rand_center[axis] = np.random.randint( 0, image.shape[axis]) if mt.coord_overflow(rand_center - window_half, image.shape) or mt.coord_overflow( rand_center + BOX_SIZE - window_half, image.shape): invalid_loc = True continue if 'segmask' in dir() and not ( segmask is None ) and not segmask[rand_center[0], rand_center[1], rand_center[2]]: invalid_loc = True continue for index_search, nodule_diameter_search, v_center_search in v_centers: rpos = v_center_search - rand_center if abs(rpos[0]) < window_half and abs( rpos[1] ) < window_half and abs( rpos[2] ) < window_half: #the negative sample is located in the positive location invalid_loc = True break for candidate_coord in candidate_coords: rpos = candidate_coord - rand_center if abs(rpos[0]) < window_half and abs( rpos[1] ) < window_half and abs( rpos[2] ) < window_half: #the negative sample is located in the pre-extracted candidate locations candidate_overlap = True break if candidate_overlap: continue zyx_1 = rand_center - window_half zyx_2 = rand_center + BOX_SIZE - window_half nodule_box = np.zeros([BOX_SIZE, BOX_SIZE, BOX_SIZE], np.int16) #---nodule_box_size = 45 img_crop = image[zyx_1[0]:zyx_2[0], zyx_1[1]:zyx_2[1], zyx_1[2]:zyx_2[2]] #---截取立方体 img_crop[ img_crop < -1024] = -1024 #---设置窗宽,小于-1000的体素值设置为-1000 if img_crop.shape[0] != BOX_SIZE | img_crop.shape[ 1] != BOX_SIZE | img_crop.shape[2] != BOX_SIZE: print("error in resmapleing shape") try: nodule_box[ 0:BOX_SIZE, 0:BOX_SIZE, 0:BOX_SIZE] = img_crop # ---将截取的立方体置于nodule_box except: # f = open("log.txt", 'a') # traceback.print_exc(file=f) # f.flush() # f.close() print("candidate error") continue #nodule_box[nodule_box == 0] = -1024#---将填充的0设置为-1000,可能有极少数的体素由0=>-1000,不过可以忽略不计 self.save_nonnodule(nodule_box, patient_uid + "_rc_" + str(rc)) print("random sampling done") print('Done for this patient!\n\n') print('Done for all!')
saver.restore(sess, net_file) start_time = time.time() #patient_evaluations = open(evaluation_path + "/patient_evaluations.log", "w") results = [] CPMs = [] CPMs2 = [] test_patients = all_patients[4:5] bt.filelist_store(all_patients, evaluation_path + "/patientfilelist.log") #random.shuffle(test_patients) for p in range(len(test_patients)): patient = test_patients[p] #patient = "./LUNA16/subset9/1.3.6.1.4.1.14519.5.2.1.6279.6001.212608679077007918190529579976.mhd" patient = "./LUNA16/subset9/1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408431413578140925249.mhd" #patient = "./TIANCHI_examples/LKDS-00005.mhd" uid = mt.get_serie_uid(patient) annotations = mt.get_annotations(uid, annotation_file) if len(annotations) == 0: print('%d/%d patient %s has no annotations, ignore it.' % (p + 1, len(test_patients), uid)) #patient_evaluations.write('%d/%d patient %s has no annotations, ignore it\n' %(p+1, len(test_patients), uid)) continue print('%d/%d processing patient:%s' % (p + 1, len(test_patients), uid)) full_image_info = sitk.ReadImage(patient) full_scan = sitk.GetArrayFromImage(full_image_info) origin = np.array(full_image_info.GetOrigin( ))[::-1] #the order of origin and old_spacing is initially [z,y,x] old_spacing = np.array(full_image_info.GetSpacing())[::-1] image, new_spacing = mt.resample(full_scan, old_spacing) #resample print('Resample Done. time:{}s'.format(time.time() - start_time))
def candidates_crop( self): #the term 'augment' is invalid when 'overbound' is True if os.access(self.output_path, os.F_OK): shutil.rmtree(self.output_path) os.makedirs(self.output_path) os.mkdir(self.nodules_npy_path) #训练用正样本路径 os.mkdir(self.nonnodule_npy_path) #训练用负样本路径 if not os.access(self.vision_path, os.F_OK): os.makedirs(self.vision_path) for patient in enumerate(tqdm(self.ls_all_patients)): patient = patient[1] #patient = './TIANCHI_data/val/LKDS-00002.mhd' print(patient) # 检查这个病人有没有大于3mm的结节 if patient not in self.df_annotations.file.values: print('Patient ' + patient + 'Not exist!') continue patient_uid = mt.get_serie_uid(patient) patient_nodules = self.df_annotations[self.df_annotations.file == patient] full_image_info = sitk.ReadImage(patient) full_scan = sitk.GetArrayFromImage(full_image_info) origin = np.array( full_image_info.GetOrigin())[::-1] #---获取“体素空间”中结节中心的坐标 old_spacing = np.array(full_image_info.GetSpacing() )[::-1] #---该CT在“世界空间”中各个方向上相邻单位的体素的间距 image, new_spacing = mt.resample(full_scan, old_spacing) #---重采样 print('resample done') v_centers = [] nonnodule_coords = [] nodule_coords = [] for index, nodule in patient_nodules.iterrows(): nodule_class = nodule.get("class") nodule_center = np.array( [nodule.coordZ, nodule.coordY, nodule.coordX]) #---获取“世界空间”中结节中心的坐标 v_center = np.rint( (nodule_center - origin) / new_spacing) #映射到“体素空间”中的坐标 v_center = np.array(v_center, dtype=int) v_centers.append([index, nodule_class, v_center]) if nodule_class == 1: nodule_coords.append(v_center) else: nonnodule_coords.append(v_center) #volume_regioned = cvm.view_coordinations(image, nonnodule_coords, window_size=56, reverse=False, slicewise=True, show=False) #np.save(self.vision_path+"/"+patient_uid+"_candidatenonnodule.npy", volume_regioned) #volume_regioned = cvm.view_coordinations(image, nodule_coords, window_size=10, reverse=False, # slicewise=False, show=False) #np.save(self.vision_path+"/"+patient_uid+"_candidatenodule.npy", volume_regioned) #---这一系列的if语句是根据“判断一个结节的癌性与否需要结合该结节周边位置的阴影和位置信息”而来,故每个结节都获取了比该结节尺寸略大的3D体素 #get annotations nodule window_half = int(BOX_SIZE / 2) num_translations = 1 for index, nodule_class, v_center in v_centers: invalid_loc = False if nodule_class == 0: for nodule_coord in nodule_coords: rpos = nodule_coord - v_center if abs(rpos[0]) <= window_half and abs( rpos[1]) <= window_half and abs( rpos[2]) <= window_half: # the negative sample is located in the positive location invalid_loc = True break if not invalid_loc: zyx_1 = v_center - window_half # 注意是: Z, Y, X zyx_2 = v_center + BOX_SIZE - window_half if mt.coord_overflow(zyx_1, image.shape) or mt.coord_overflow( zyx_2, image.shape): continue nodule_box = np.zeros([BOX_SIZE, BOX_SIZE, BOX_SIZE], np.int16) # ---nodule_box_size = 45 img_crop = image[zyx_1[0]:zyx_2[0], zyx_1[1]:zyx_2[1], zyx_1[2]:zyx_2[2]] # ---截取立方体 img_crop[img_crop < -1024] = -1024 # ---设置窗宽,小于-1024的体素值设置为-1024 try: nodule_box = img_crop[ 0:BOX_SIZE, 0:BOX_SIZE, 0:BOX_SIZE] # ---将截取的立方体置于nodule_box except: print("annotation error") continue if nodule_class == 0: self.save_nonnodule( nodule_box, patient_uid + "_" + str(index) + "_cc") #else: # self.save_annotations_nodule(nodule_box, patient_uid+"_"+str(index)+"_ob") print('Done for this patient!\n\n') print('Done for all!')