def get_sum_of_rectangle(im_int, x, y, w, h): x_f = round_i(x) y_f = round_i(y) x_t = round_i(x + w) - 1 y_t = round_i(y + h) - 1 area = integrate(im_int, y_f, x_f, y_t, x_t) if isinstance(area, (collections.Sequence, np.ndarray)): return area[0] else: return area
def extract_patch_on_slide( file_path_tif, \ file_path_tis_mask, \ file_path_jpg, \ save_location_path_patch_position_visualize, \ save_location_path_patch_position_csv, \ size_patch): """ -- Intput : file_path_tif : full path file_path_tis_mask : full path file_path_jpg : full path save_location_path_patch_position_visualize : full path save_location_path_patch_position_csv : full path -- Result : Draw patch position. Save coordinate of patch at level_0. """ patch_level = 0 contours_level = 4 mask_level = 4 slide = OpenSlide(file_path_tif) slide_w_lv_4, slide_h_lv_4 = slide.level_dimensions[4] downsample = slide.level_downsamples[4] size_patch_lv_4 = int(size_patch / downsample) # Make integral image of slide tissue_mask = cv2.imread(file_path_tis_mask, 0) integral_image_tissue = integral_image(tissue_mask.T / 255) # Load original bgr_jpg_lv_4 for visualizing patch position wsi_bgr_jpg = cv2.imread(file_path_jpg) wsi_jpg_visualizing_patch_position = wsi_bgr_jpg.copy() print('==> making contours of tissue region from jpg ..') # Find and Draw contours_tissue - (color : blue) _, contours_tissue, _ = cv2.findContours( \ tissue_mask, \ cv2.RETR_TREE, \ cv2.CHAIN_APPROX_SIMPLE) cv2.drawContours(wsi_jpg_visualizing_patch_position, \ contours_tissue, -1, (255, 0, 0), 2) # Make csv_writer csv_file = open(save_location_path_patch_position_csv, 'w') fieldnames = ['X', 'Y'] csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames) csv_writer.writeheader() print('==> Extracting patches randomly on tissue region...') patch_cnt = 0 ### Extract random patches on tissue region for contour in contours_tissue: # Check if contour area is samller than patch area area = cv2.contourArea(contour) area_patch_lv_4 = size_patch_lv_4**2 if area < area_patch_lv_4: continue # Determine number of patches to extract number_patches = int(round(area / area_patch_lv_4 * 1.5)) number_patches = min(50, number_patches) print('contour area : ', area, ' num_patch : ', number_patches) # Get coordinates of contour (level : 4) coordinates = (np.squeeze(contour)).T coords_x = coordinates[0] coords_y = coordinates[1] # Bounding box vertex p_x_left = np.min(coords_x) p_x_right = np.max(coords_x) p_y_top = np.min(coords_y) p_y_bottom = np.max(coords_y) # Make candidates of patch coordinate (level : 4) candidate_x = \ np.arange(round(p_x_left), round(p_x_right)).astype(int) candidate_y = \ np.arange(round(p_y_top), round(p_y_bottom)).astype(int) # Pick coordinates randomly len_x = candidate_x.shape[0] len_y = candidate_y.shape[0] number_patches = min(number_patches, len_x) number_patches = min(number_patches, len_y) random_index_x = np.random.choice(len_x, number_patches, replace=False) random_index_y = np.random.choice(len_y, number_patches, replace=True) for i in range(number_patches): patch_x = candidate_x[random_index_x[i]] patch_y = candidate_y[random_index_y[i]] # Check if out of range if (patch_x + size_patch_lv_4 > slide_w_lv_4) or \ (patch_y + size_patch_lv_4 > slide_h_lv_4): continue # Check ratio of tumor region tissue_integral = integrate(integral_image_tissue, \ (patch_x, patch_y), \ (patch_x + size_patch_lv_4 - 1, patch_y + size_patch_lv_4 - 1)) tissue_ratio = tissue_integral / (size_patch_lv_4**2) if tissue_ratio < 0.9: continue # Save patches position to csv file. patch_x_lv_0 = int(round(patch_x * downsample)) patch_y_lv_0 = int(round(patch_y * downsample)) csv_writer.writerow({'X': patch_x_lv_0, 'Y': patch_y_lv_0}) patch_cnt += 1 # save cut patches im = slide.read_region((patch_x_lv_0, patch_y_lv_0), 0, (size_patch, size_patch)) im_rgba = np.array(im) im_rgb = cv2.cvtColor(im_rgba, cv2.COLOR_RGBA2RGB) cur_patient_name = file_path_tif.split('/')[4] cur_patient_name = cur_patient_name.split('.')[0] global ID cur_cut_name = save_cut_path_positive_patch_17 + cur_patient_name + '_' + str( ID) + '.jpg' ID += 1 print('cur_cut_name', cur_cut_name) cv2.imwrite(cur_cut_name, im_rgb) # Draw patch position (color : Green) cv2.rectangle(wsi_jpg_visualizing_patch_position, \ (patch_x, patch_y), \ (patch_x + size_patch_lv_4, patch_y + size_patch_lv_4), \ (0, 255, 0), \ thickness=1) print('slide :\t', file_path_tif) print('patch_cnt:\t', patch_cnt) # Save visualizing image. cv2.imwrite(save_location_path_patch_position_visualize, \ wsi_jpg_visualizing_patch_position) csv_file.close()
def create_other_txt_sequence(file_path_tif,file_path_tis_mask,file_path_mask,list,\ file_path_txt,kind,patch_size,mask_downsample,stride): txt = open(file_path_txt, 'a+') for file in os.listdir(file_path_tif): if "TCGA-2Y-A9H1" in file: continue file_name = file[:-4] lab_mask_name = file_path_mask + file_name + '_mask_' + str( mask_downsample) + '.png' if (file_name in list) and (os.path.exists(lab_mask_name)): tissue_mask_name = file_path_tis_mask + file_name + '_tissue_mask_' + str( mask_downsample) + '.png' tissue_mask = cv2.imread(tissue_mask_name, 0) integral_image_tissue = integral_image(tissue_mask.T / 255) # Make integral image of slide lab_mask = cv2.imread(lab_mask_name, 0) integral_image_lab = integral_image(lab_mask.T / 255) size_patch_lv_k = int(patch_size / mask_downsample) # patch在第mask_level层上映射的大小 print(size_patch_lv_k) slide = OpenSlide(file_path_tif + file) slide_w_lv_0, slide_h_lv_0 = slide.dimensions slide_w = int(slide_w_lv_0 / mask_downsample) slide_h = int(slide_h_lv_0 / mask_downsample) _, contours_lab, _ = cv2.findContours(lab_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) _, contours_tissue, _ = cv2.findContours(tissue_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) p_left = [] p_right = [] p_bottom = [] p_top = [] # Extract random patches on tissue region for contour in contours_tissue: coordinates = (np.squeeze(contour)).T coords_x = coordinates[0] coords_y = coordinates[1] # Bounding box vertex p_left.append(np.min(coords_x)) p_right.append(np.max(coords_x)) p_top.append(np.min(coords_y)) p_bottom.append(np.max(coords_y)) p_x_left = min(p_left) p_x_right = max(p_right) p_y_top = min(p_top) p_y_bottom = max(p_bottom) stride_lv = int(stride / mask_downsample) print(p_x_left, p_x_right, p_y_top, p_y_bottom) for x in range(p_x_left, p_x_right, stride_lv): for y in range(p_y_top, p_y_bottom, stride_lv): if (x + size_patch_lv_k - 1 >= slide_w) or (y + size_patch_lv_k - 1 >= slide_h): continue tissue_integral = integrate(integral_image_tissue, \ (x, y), \ (x + size_patch_lv_k - 1, y + size_patch_lv_k - 1) ) tissue_ratio = tissue_integral / (size_patch_lv_k**2) lab_integral = integrate(integral_image_lab, \ (x, y), \ (x + size_patch_lv_k - 1, y + size_patch_lv_k - 1) ) lab_ratio = lab_integral / (size_patch_lv_k**2) if tissue_ratio > 0.8 and lab_ratio < 0.2: x_lv = int(x + size_patch_lv_k / 2) y_lv = int(y + size_patch_lv_k / 2) patch_x_lv_0 = str((round(x_lv * mask_downsample))) patch_y_lv_0 = str((round(y_lv * mask_downsample))) txt.writelines([ file_name, ',', patch_x_lv_0, ',', patch_y_lv_0, ',', kind, '\n' ]) txt.close()
def patch_sampling(slide, mask, nonzerox, nonzeroy, **opts): """Patch sampling on whole slide image by random points over an uniform distribution. TO-DO +++++ Optimize: compute non zero list in the caller and pass only the intersting part -- will help in batch linear sampling... Arguments +++++++++ :param obj slide: OpenSlide Object :param obj mask: mask image as (0, 1) int Numpy array :param list nonzerox, nonzeroy: Numpy arrays of nonzero points over `mask` Keyword arguments +++++++++++++++++ :param int start_idx: start index on the mask's nonzero point list. Ignored if `mode` == 'random' :param obj logger: a pymod:logging instance :param int slide_level: level of mask :param int patch_size: size of patch scala integer n :param int n_samples: the number of patches to extract (batch size) ...plaese complete me! :return list: patches (RGB images), list of patch points (starting from left top), last used nonzero mask point's index or None (random sampling only) """ global logger # def values updated from **opts dopts = { 'area_overlap' : .6, 'bad_batch_size' : 500, 'gray_threshold' : 90, 'margin_width_x' : 250, # as per original code, watch out! 'margin_width_y' : 50, # ditto 'method' : 'random', 'n_samples' : 100, 'patch_size' : 224, 'slide_level' : 5, 'start_idx' : 0, 'white_level' : 200, 'white_threshold' : .3, 'white_threshold_incr' : .05, 'white_threshold_max' : .7, } for dk in dopts: # [BUG] if called with a missing key, won't get the default!? try: dopts[dk] = opts.pop(dk, None) except KeyError as k: pass # reinject as standard var... This is just because I'm lazy and want # to keep the original names ;-) exec "{} = dopts[dk]".format(dk) if opts: # leftovers... raise RuntimeError, 'Unexpected options {}'.format(opts) logger.debug("kw opts:\n{}.".format(dopts)) # bind to aux functions bfn = { 'get_index' : None, 'get_white_threshold' : None, 'is_batch_over' : None, } for n in bfn.keys(): bfn[n] = globals()['{}__{}'.format(n, method)] if not callable(bfn[n]): raise RuntimeError, '[BUG] {} => {}: invalid aux function binding'.format(n, bfn[n]) report = { 'out_of_boundary' : 0, 'on_border' : 0, 'black_patches' : 0, 'white_patches' : 0, 'gray_patches' : 0, } patch_list = [] patch_point = [] # patch size at given level or resolution level_patch_size = int(patch_size / slide.level_downsamples[slide_level]) x_l, y_l = nonzerox, nonzeroy x_ln, y_ln = len(x_l), len(y_l) logger.info('Working mask has {} x {} nonzero points'.format(x_ln, len(y_l))) if x_ln < level_patch_size * 2: logger.info( "Not enough nonzero mask points for at least 2 patches ({} < {})".format( x_ln, level_patch_size ) ) return [], [], None # computing the actual level of resolution (dot product) x_ws = (np.round(x_l * slide.level_downsamples[slide_level])).astype(int) y_ws = (np.round(y_l * slide.level_downsamples[slide_level])).astype(int) cnt = 0 # good patch counter nt_cnt = 0 # not taken patch counter p_iterator = bfn['get_index'](start_idx, x_ln - 1) p_idx = start_idx # just for init purposes # while(not bfn['is_batch_over'](cnt, n_samples)): while(cnt < n_samples): # pick an index... try: p_idx = p_iterator.next() except StopIteration: break # ...corresponding point in the mask level_point_x, level_point_y = x_l[p_idx], y_l[p_idx] # [BUG] otsu threshold takes also border, so discard?? mmh, needs # double check (risk missing stuff...) if is_point_on_border(level_point_x, level_point_y, margin_width_x, margin_width_y): # logger.debug( # 'Skipping point on mask border: {}x ?< {}, {}y ?< {}'.format( # level_point_x, margin_width_x, level_point_y, margin_width_y # ) # ) report['on_border'] += 1 continue if not is_point_within_boundaries( level_point_x, level_point_y, level_patch_size, mask.shape ): # logger.debug( # 'Skipping point out of mask boundary: {} ?> {}, {} ?> {}'.format( # level_point_x, mask.shape[0], level_point_y, mask.shape[1] # ) # ) report['out_of_boundary'] += 1 continue # make patch from mask image level_patch_mask = mask[ int(level_point_x) : int(level_point_x + level_patch_size), int(level_point_y) : int(level_point_y + level_patch_size) ] # apply integral ii_map = integral_image(level_patch_mask) ii_sum = integrate(ii_map, (0, 0), (level_patch_size - 1, level_patch_size - 1)) # total patch area should covers at least x% of the annotation # region overlap = float(ii_sum) / (level_patch_size**2) if overlap < area_overlap: continue # square patch (RGB point array in [0, 255]) patch = slide.read_region( (y_ws[p_idx], x_ws[p_idx]), 0, (patch_size, patch_size) ) patch = np.array(patch) if np.sum(patch) == 0: report['black_patches'] += 1 # logger.debug('Skipping black patch at {}, {}'.format(level_point_x, level_point_y)) continue # check almost white RGB values. white_mask = patch[:,:,0:3] > white_level # sum over the 3 RGB channels if float(np.sum(white_mask)) / (patch_size**2*3) <= white_threshold: patch = cv2.cvtColor(patch, cv2.COLOR_RGBA2BGR) if np.mean(patch) > gray_threshold: # got a good one... patch_list.append(patch) # ...with its location patch_point.append((x_l[p_idx], y_l[p_idx])) cnt += 1 else: report['gray_patches'] += 1 # logger.debug('Skipping grey patch at {}, {}'.format(x_l[p_idx], y_l[p_idx])) else: # bad one: too white report['white_patches'] += 1 nt_cnt += 1 # possibly get an update nt_cnt, white_threshold = bfn['get_white_threshold']( nt_cnt, bad_batch_size, white_threshold, white_threshold_max, white_threshold_incr ) if white_threshold == None: logger.warning('Max white threshold reached! Bailing out') break # {end while} logger.info( 'Skipped points: {} on mask boder, {} out of mask boundary'.format( report['on_border'], report['out_of_boundary'] ) ) logger.info( 'Skipped patches: {} black, {} white, {} gray'.format( report['black_patches'], report['white_patches'], report['gray_patches'] ) ) logger.info('Extracted {} patches'.format(len(patch_point))) # in 'random' method, only one batch is done, so it doens't make sense to # return the last index. Instead signal that we're over with sampling. p_idx = None if method == 'random' else p_idx return patch_list, patch_point, p_idx
def patch_sampling_using_integral(slide,slide_level,mask,patch_size,patch_num): """ patch sampling on whole slide image input: slide = OpenSlide Object slide_level = level of mask mask = mask image ( 0-1 int type nd-array) patch_size = size of patch scala integer n patch_num = the number of output patches output: list of patches(RGB Image), list of patch point (starting from left top) """ patch_list = [] # patches patch_point = [] # patch locations # taking the nonzero points in the mask x_l,y_l = mask.nonzero() #slide_level=7 if len(x_l) > patch_size/slide.level_downsamples[slide_level]*2: level_patch_size = int(patch_size/slide.level_downsamples[slide_level]) print 'DEBUGGG: ', slide_level # computing the actual level of resolution # applying the nonzero mask as a dot product x_ws = (np.round(x_l*slide.level_downsamples[slide_level])).astype(int) y_ws = (np.round(y_l*slide.level_downsamples[slide_level])).astype(int) cnt = 0 # patch counter nt_cnt = 1 # not taken counter white_threshold = .3 #white_threshold = 1.0 while(cnt < patch_num) : # sampling from random distribution p_idx = randint(0,len(x_l)-1) # picking the random point in the mask level_point_x,level_point_y = x_l[p_idx], y_l[p_idx] if (level_point_y < 50) or (level_point_x < 250): ##new add to check continue # check the boundary to make patch check_bound = np.resize(np.array([level_point_x+level_patch_size,level_point_y+level_patch_size]),(2,)) if check_bound[0] > mask.shape[0] or check_bound[1] > mask.shape[1]: continue # make patch from mask image level_patch_mask = mask[int(level_point_x):int(level_point_x+level_patch_size),int(level_point_y):int(level_point_y+level_patch_size)] # apply integral ii_map = integral_image(level_patch_mask) ii_sum = integrate(ii_map,(0,0),(level_patch_size-1,level_patch_size-1)) area_percent = float(ii_sum)/(level_patch_size**2) # checking if the total area of the patch covers at least 80% of # the annotation region if area_percent<0.6: continue if cnt > patch_num*10+1000: print "There is no more patches to extract in this slide" print "mask region is too small" print "final number of patches : ",len(patch_list) break patch=slide.read_region((y_ws[p_idx],x_ws[p_idx]),0,(patch_size,patch_size)) patch = np.array(patch) #print '[integral] np.sum(patch): ', np.sum(patch) if np.sum(patch)==0: print('[integral] AaAaAH its zeroo!!') continue white_mask = patch[:,:,0:3] > 200 if float(np.sum(white_mask))/(patch_size**2*3) <= white_threshold : #if True: if np.sum(patch)>0: # adding the patch to the patches list patch_list.append(cv2.cvtColor(patch,cv2.COLOR_RGBA2BGR)) # adding the patch location to the list patch_point.append((x_l[p_idx],y_l[p_idx])) cnt += 1 # increasing patch counter else: print 'This is a black patch!' else: nt_cnt += 1 #print 'white_mask sum: ', np.sum(white_mask) #print 'white ratio: ', float(np.sum(white_mask))/(patch_size**2*3) #print 'Rejected location: {0},{1}'.format(x_l[p_idx],y_l[p_idx]) if nt_cnt %1000 == 0: if white_threshold < .7: white_threshold += .05 nt_cnt = 1 print 'Increasing white_threshold of 0.05: ', white_threshold else: print 'No more patches to extract that have more than 30 percent of not white content' break def_pl=[] def_pp=[] for i in range(len(patch_list)): if (np.sum(patch_list[i])>0) and (np.mean(patch_list[i])>90): def_pl.append(patch_list[i]) def_pp.append(patch_point[i]) return def_pl, def_pp
def tumor_patch_sampling_using_centerwin(slide,slide_level,mask,patch_size,patch_num): """ tumor patch sampling using center window plz input the only tumor mask it will malfunctioned if you input normal mask or tissue mask input parameters are same as patch_sampling_using_integral """ patch_list = [] patch_point = [] window_size = int(32/ slide.level_downsamples[slide_level]) x_l,y_l = mask.nonzero() if len(x_l) > patch_size*2: level_patch_size = int(patch_size/slide.level_downsamples[slide_level]) x_ws = (np.round(x_l*slide.level_downsamples[slide_level])).astype(int) y_ws = (np.round(y_l*slide.level_downsamples[slide_level])).astype(int) cnt=0 while(len(patch_list) < patch_num) : # loop cnt cnt+=1 #random Pick point in mask p_idx = randint(0,len(x_l)-1) #Get the point in mask level_point_x,level_point_y = x_l[p_idx], y_l[p_idx] #Check boundary to make patch check_bound = np.resize(np.array([level_point_x+level_patch_size,level_point_y+level_patch_size]),(2,)) if check_bound[0] > mask.shape[0] or check_bound[1] > mask.shape[1]: continue #make patch from mask image level_patch_mask = mask[int(level_point_x):int(level_point_x+level_patch_size),int(level_point_y):int(level_point_y+level_patch_size)] '''Biggest difference is here''' #apply center window (32x32) cntr_x= (level_patch_size/2)-1 cntr_y= (level_patch_size/2)-1 win_x = cntr_x-window_size/2 win_y = cntr_y-window_size/2 t_window = level_patch_mask[win_x:(win_x+window_size),win_y:(win_y+window_size)] #print level_patch_mask.shape #print win_x #print win_y #apply integral to window ii_map = integral_image(t_window) #print t_window.shape ii_sum = integrate(ii_map,(0,0),(window_size-1,window_size-1)) area_percent = float(ii_sum)/(window_size**2) # print "integral_area: ",area_percent # print "loop count: ",cnt if area_percent <1.0: continue if cnt > patch_num*10+1000: print "There is no moare patches to extract in this slide" print "mask region is too small" print "final number of patches : ",len(patch_list) break #patch,point is appended the list #print "region percent: ",area_percent patch_point.append((x_l[p_idx],y_l[p_idx])) patch=slide.read_region((y_ws[p_idx],x_ws[p_idx]),0,(patch_size,patch_size)) patch =np.array(patch) patch_list.append(cv2.cvtColor(patch,cv2.COLOR_RGBA2BGR)) return patch_list, patch_point
def patch_sampling_using_integral(slide,slide_level,mask,patch_size,patch_num): """ patch sampling on whole slide image slide = OpenSlide Object slide_level = level of mask mask = mask image ( 0-1 int type nd-array) patch_size = size of patch scala integer n patch_num = the number of output patches return list of patches(RGB Image), list of patch point(left top) """ patch_list = [] patch_point = [] x_l,y_l = mask.nonzero() if len(x_l) > patch_size*2: level_patch_size = int(patch_size/slide.level_downsamples[slide_level]) x_ws = (np.round(x_l*slide.level_downsamples[slide_level])).astype(int) y_ws = (np.round(y_l*slide.level_downsamples[slide_level])).astype(int) cnt=0 while(len(patch_list) < patch_num) : # loop cnt cnt+=1 #random Pick point in mask p_idx = randint(0,len(x_l)-1) #Get the point in mask level_point_x,level_point_y = x_l[p_idx], y_l[p_idx] #Check boundary to make patch check_bound = np.resize(np.array([level_point_x+level_patch_size,level_point_y+level_patch_size]),(2,)) if check_bound[0] > mask.shape[0] or check_bound[1] > mask.shape[1]: continue #make patch from mask image level_patch_mask = mask[int(level_point_x):int(level_point_x+level_patch_size),int(level_point_y):int(level_point_y+level_patch_size)] #apply integral ii_map = integral_image(level_patch_mask) ii_sum = integrate(ii_map,(0,0),(level_patch_size-1,level_patch_size-1)) area_percent = float(ii_sum)/(level_patch_size**2) # print "integral_area: ",area_percent # print "loop count: ",cnt if area_percent<0.8: continue if cnt > patch_num*10+1000: print "There is no moare patches to extract in this slide" print "mask region is too small" print "final number of patches : ",len(patch_list) break #patch,point is appended the list #print "region percent: ",area_percent patch_point.append((x_l[p_idx],y_l[p_idx])) patch=slide.read_region((y_ws[p_idx],x_ws[p_idx]),0,(patch_size,patch_size)) patch =np.array(patch) patch_list.append(cv2.cvtColor(patch,cv2.COLOR_RGBA2BGR)) return patch_list, patch_point