def patch_generation_normal_from_tumor(tif_dir, mask_dir, save_fdir, save_pdir, save_cdir, slide_level, patch_size, patch_num, sel_dir, tumor_win=False): """ save patch image and extraction point with csv, jpg image on the directory tif_dir : the tif directory mask_dir : the mask directory save_fdir : patch saved directory save_pdir : point jpg directory save_cdir : Y,X csv directory slide : slide_level that mask image was applied with patch_size : patch size square patch_num : the number of patches in a whole slide tumor_win : tumor window patch sampling true tif file and mask file sholud be one to one mached and same ordered """ list_tif_name = filelist_in_directory(tif_dir) list_mask_name = filelist_in_directory(mask_dir) list_sel_name = filelist_in_directory(sel_dir) list_sel_name = list_sel_name[1::2] for slide_idx in range(len(list_tif_name)): if slide_idx not in np.array( [15, 18, 20, 29, 33, 44, 46, 51, 54, 55, 79, 92, 95]) - 1: pwd_tif = join(tif_dir, list_tif_name[slide_idx]) pwd_msk = join(mask_dir, list_mask_name[slide_idx]) filename = splitext(list_tif_name[slide_idx])[0] # open slide, csv, BGR_image, mask slide = OpenSlide(pwd_tif) f = open(save_cdir + filename + ".csv", 'wt') c_writer = csv.writer(f) c_writer.writerow(('Y', 'X')) #rgba_pil = slide.read_region((0,0),slide_level,slide.level_dimensions[slide_level]) #bgr_im = cv2.cvtColor(np.array(rgba_pil),cv2.COLOR_RGBA2BGR) ### change bgr_im bgr_im = cv2.imread(sel_dir + list_sel_name[slide_idx]) print sel_dir + list_sel_name[slide_idx] mask = cv2.imread(pwd_msk, cv2.IMREAD_GRAYSCALE) #if mask tunes 255 change to 1 if np.max(mask) == 255: mask = mask == 255 mask = mask.astype(int) # sampling patches if tumor_win: patch_list, patch_point = tumor_patch_sampling_using_centerwin( slide, slide_level, mask, patch_size, patch_num) else: patch_list, patch_point = patch_sampling_using_integral( slide, slide_level, mask, patch_size, patch_num) #leveled patchsize p_l_size = patch_size / slide.level_downsamples[slide_level] p_l_size = int(p_l_size) #image wirte and save patches for f_th in range(len(patch_list)): cv2.imwrite( save_fdir + filename + "_patch_" + str(f_th) + "_" + str(patch_point[f_th][1]) + "_" + str(patch_point[f_th][0]) + "_" + str(patch_size) + ".jpg", patch_list[f_th]) c_writer.writerow((patch_point[f_th][1], patch_point[f_th][0])) cv2.rectangle(bgr_im, (patch_point[f_th][1], patch_point[f_th][0]), (patch_point[f_th][1] + p_l_size, patch_point[f_th][0] + p_l_size), (0, 0, 255), 1) cv2.imwrite(save_pdir + filename + "_selection_point.jpg", bgr_im) print "complete patch extraction about " + list_tif_name[slide_idx] f.close() return 0
def extract_patches(self, h5db, new_folder): print 'OpenSlide needed to extract patches.' return None ''' for centre in self.centres: print('[cnn][patch_extraction] Selected Centre: ', centre) # each centre may have more than one annotation XML file, so here we retrieve # a list of all the XMLs related to the current centre annotation_list = np.sort(self.get_annotation_list(centre, self.xml_source_fld)) # for each XML file in the annotation list # we want to extract tumor and normal patches for xml_file in annotation_list: files_counter +=1 # variable to shape the final data vector ''' print('[debug] ', self.name) print('[debug] ', self.settings) self.set_files_counter(self.count_annotation_files()) print('[dataset] {0} [extract_patches] {1} total annotation files.'. format(self.name, self.files_counter)) for centre in self.centres: annotation_list = self.get_annotation_list(centre) for xml_file in annotation_list: slide_path = self.get_wsi_path(centre, xml_file) xml_path = os.path.join(self.xml_source_fld, xml_file) # retrieving the information about the file analysed. # info is a dictionary with the following keys: # info['centre'], current centre number # info['patient'], current patient number # info['node'], current WSI node info = self.get_info(xml_path, centre) #functions.setDBHierarchy(h5db, self.settings,info) if info['patient'] == '008_Mask.tif': continue if xml_path != None: ## add check slide is open and ok # preprocess takes the WSI path, and the slide_level and returns the # the WSI openslide obj, the tumor annotation mask, the WSI image # and the tumor contours if self.name == 'camelyon16': print('import openslides') #slide = openslide.OpenSlide(slide_path) #rgb_im = np.array(slide.read_region((0,0),7,slide.level_dimensions[7])) #mask_file = xml_path+'Tumor_{}_Mask.tif'.format(info['patient']) #import pdb; pdb.set_trace() annotations = np.asarray( openslide.OpenSlide(xml_path).read_region( (0, 0), 7, slide.level_dimensions[7])) annotations_mask = annotations[:, :, 0] #import pdb; pdb.set_trace() im_contour = rgb_im else: import pdb pdb.set_trace() slide, annotations_mask, rgb_im, im_contour = functions.preprocess( slide_path, xml_path, slide_level=self.settings['slide_level']) tum_patch_list, tum_patch_point = integral.patch_sampling_using_integral( slide, self.settings['slide_level'], annotations_mask, self.settings['patch_size'], self.settings['n_samples']) # conversion of the lists to np arrays tum_patch_array = np.asarray(tum_patch_list) #import pdb; pdb.set_trace() tum_locations = np.array(tum_patch_point) # storage in the HDF5 db self.store(h5db, info, tum_patch_array, tum_locations, 'tumor') # reverting the tumor mask to find normal tissue and extract patches # Note : # normal_mask = tissu mask(morp_im) - tummor mask(annotations_mask) ##### restart from here ## morp_im = functions.get_morp_im(rgb_im) normal_im = morp_im - annotations_mask ## np.min(normal_im) := -1.0 normal_im = normal_im == 1.0 normal_im = (normal_im).astype(int) # sampling normal patches with uniform distribution nor_patch_list, nor_patch_point = integral.patch_sampling_using_integral( slide, self.settings['slide_level'], normal_im, self.settings['patch_size'], self.settings['n_samples']) nor_patch_array = np.asarray(nor_patch_list) normal_patches_locations = np.array(nor_patch_point) # storing the normal patches and their locations self.store(h5db, info, nor_patch_array, nor_patch_point, 'normal') ''' Visualisation ''' # plotting the tumor locations in the XML file # Drawing the normal patches sampling points # tumor_locations.png shows the tumor patches locations in red # and the normal patches locations in green tumor_locations_im = rgb_im plt.figure() plt.imshow(tumor_locations_im) for p_x, p_y in normal_patches_locations: plt.scatter(p_y, p_x, c='g') #cv2.circle(tumor_locations_im,(p_y,p_x),30,(0,255,0),10) for p_x, p_y in tum_locations: plt.scatter(p_y, p_x, c='r') #cv2.circle(tumor_locations_im,(p_y,p_x),30,(255,0,0), 10) print( '[cnn][patch_extraction] Saving tumor locations image') plt.savefig( os.path.join( new_folder, 'level{}_centre{}_patient{}_node{}_tumor_locations.png' .format(self.settings['slide_level'], info['centre'], info['patient'], info['node']))) plt.close() #print('Saving tumor locations image') #plt.savefig('tumor_locations_patient0{}_node{}'.format(info['patient'], info['node'])) print( '[cnn][patch_extraction] Saving annotation mask and normal tissue mask' ) plt.figure() plt.imshow(annotations_mask) plt.savefig( os.path.join( new_folder, 'level{}_centre{}_patient{}_node{}_annotation_mask.png' .format(self.settings['slide_level'], info['centre'], info['patient'], info['node']))) plt.close() plt.figure() plt.imshow(normal_im) plt.savefig( os.path.join( new_folder, 'level{}_centre{}_patient{}_node{}_normal_tissue_mask.png' .format(self.settings['slide_level'], info['centre'], info['patient'], info['node']))) plt.close() plt.close('all') self.tum_counter += len(tum_patch_array) self.nor_counter += len(nor_patch_array) #self.nor_counter = 0 return
tummor_contours = get_opencv_contours_from_xml( xml_path, slide.level_downsamples[slide_level]) tum_im = rgb_im.copy() cv2.drawContours(tum_im, tummor_contours, -1, (0, 255, 0), -1) ## Get Mask _, tum, _ = cv2.split(tum_im) tum = tum == 255 tum = tum.astype(int) tx_l, ty_l = tum.nonzero() sampling_num = 10 tum_patch_list, tum_patch_point = patch_sampling_using_integral( slide, slide_level, tum, patch_size, sampling_num) subplot_show(tum_patch_list, 2, 5, "tummor") """ get Normal mask tissu mask(morp_im) - tummor mask(tum) = normal mask """ normal_im = morp_im - tum ## np.min(normal_im) := -1.0 normal_im = normal_im == 1.0 normal_im = (normal_im).astype(int) nx_l, ny_l = normal_im.nonzero() nor_patch_list, nor_patch_point = patch_sampling_using_integral(