if not os.path.isdir(args.assemblyDir): os.makedirs(args.assemblyDir) # open omero connection print 'establishing BlitzGateway with OMERO' conn = BlitzGateway(args.un, args.pw, host=args.host, port=4064) conn.connect() roi_service = conn.getRoiService() splitDF = pd.read_csv(args.splitFile) # load slide print 'loading slide', args.slideFile slideFile = os.path.basename(args.slideFile) slideName = str.split(slideFile, '.')[0] slide = openslide.OpenSlide(args.slideFile) # get imageID from splitDF slideId = splitDF['Slide.ID'][splitDF['slideFile'] == slideFile] omeroId = int(splitDF['omeroId'][splitDF['slideFile'] == slideFile]) split = list(splitDF['split'][splitDF['slideFile'] == slideFile])[0] group = list(splitDF['Group'][splitDF['slideFile'] == slideFile])[0] print 'omeroID:', omeroId, 'split to', split, 'in group', group img = conn.getObject('Image', omeroId) name = img.getName() print 'loading image ROIS', name result = roi_service.findByImage(img.getId(), None) # generate empty mask
#cell# fnjson = extract_rois_svs_xml(fnxml) #cell# with open(fnjson, 'r') as fh: roilist = json.load(fh) #cell# pd.Series([roi["name"] for roi in roilist]).value_counts().index #cell# slide = openslide.OpenSlide(fnsvs) img = np.asarray(slide.associated_images["thumbnail"]) median_color = get_median_color(slide) ratio = get_thumbnail_magnification(slide) #cell# colordict = { 'open glom': 'b', 'scler glom': 'm', 'infl': 'r', 'tissue': 'w', 'art': 'olive', 'fold': 'y' }
def main(): # Load dataset npzfile = np.load('/home/nnauata/utils/dataset_slides_path.npz') X_train_path = npzfile['X_train'] y_train = npzfile['y_train'] X_val_path = npzfile['X_valid'] y_val = npzfile['y_valid'] # Process train and valid data y_train = y_train.astype('int32') y_val = y_val.astype('int32') # Define model n_batch = 1 n_channels = 1 img_height = SIZE img_width = SIZE input_shape = (n_batch, n_channels, img_height, img_width) print "input shape: " + str(input_shape) specs = { 'input_shape': input_shape, 'patch': 64, 'n_steps': 8, 'n_h_g': 64, 'n_h_l': 64, 'n_f_g': 64, 'n_f_h': 64, 'n_h_fc_1': 256, 'learning_rate': 0.0001, 'n_classes': 2, 'sigma': 0.1, 'patience': 50, 'filter_shape_1': (128, 1, 3, 3), 'filter_shape_2': (128, 128, 3, 3), 'stride': (1, 1), 'pooling_shape': (2, 2), 'n_trials': 2 } # Define CRAM start = time.time() cram = CRAM(specs) print("Compilation time: " + str(time.time() - start)) print "Start Preprocessing ..." X_train_obj = [openslide.OpenSlide(path) for path in X_train_path] X_val_obj = [openslide.OpenSlide(path) for path in X_val_path] print "Start Training ..." train_with_sgd(cram, input_shape, X_train_obj, y_train, X_val_obj, y_val, callback=sgd_callback) print "Start Preprocessing ..." X_test = npzfile['X_test'] y_test = npzfile['y_test'] X_test_obj = [openslide.OpenSlide(path) for path in X_train_path] print "Start Testing ..." # Predict a sample image cram.load('trained_model.npz') outs = [] for x, y in zip(X_test, y_test): # Show some results X_in = np.reshape(x, input_shape) outs.append(cram.predict(X_in)) # Show some results #l_t = cram.propose_region(X_in) #l_t = np.clip(np.reshape(l_t, (8, 2)), -1, 1) #print l_t #for k, l in enumerate(l_t): # img = rho(l, x) # cv2.imshow('patch_'+str(k), img.astype('uint8')) #cv2.imshow('img', x.astype('uint8')) #cv2.waitKey(0) print np.array(outs).ravel().shape outs = np.array(outs).ravel() print outs print y_test print(Counter(outs)) print(Counter(y_test)) print "Precision:" print(precision_score(y_test, outs)) print "Recall" print(recall_score(y_test, outs)) print "Accuracy:" print(accuracy_score(y_test, outs)) print "Loss:" print cram.calculate_total_loss(X_test, y_test) return
def calc_patches_cord(list_binary_img, patch_level, svs_file, patch_dir, samp, patch_size, threshold_area_percent, toplevel): patch_start_x_list = [] patch_stop_x_list = [] patch_start_y_list = [] patch_stop_y_list = [] #bin_mask_level=toplevel #print(bin_mask_level) #print(dict_properties['increment']) #sys.exit(0) OSobj = openslide.OpenSlide(svs_file) minx = 0 miny = 0 if patch_level > len(OSobj.level_dimensions) - 1: print("not enough levels " + str(patch_level) + " " + str(len(OSobj.level_dimensions) - 1)) sys.exit(0) maxx = OSobj.level_dimensions[patch_level][0] maxy = OSobj.level_dimensions[patch_level][1] start_x = minx total_num_patches = 0 selected_num_patches = 0 '''creating sub patches''' '''Iterating through x coordinate''' while start_x + patch_size < maxx: '''Iterating through y coordinate''' start_y = miny while start_y + patch_size < maxy: current_x = int( (start_x * OSobj.level_downsamples[patch_level]) / toplevel[2]) current_y = int( (start_y * OSobj.level_downsamples[patch_level]) / toplevel[2]) tmp_x = start_x + int(patch_size) tmp_y = start_y + int(patch_size) current_x_stop = int( (tmp_x * OSobj.level_downsamples[patch_level]) / toplevel[2]) current_y_stop = int( (tmp_y * OSobj.level_downsamples[patch_level]) / toplevel[2]) total_num_patches = total_num_patches + 1 #flag=0 #for m in range(current_x,current_x_stop+1): # for n in range(current_y,current_y_stop+1): # if str(m)+' '+str(n) in list_binary_img: # flag=1 #poly=Polygon([(current_x, current_y), (current_x_stop, current_y), (current_x_stop, current_y_stop), (current_x, current_y_stop), (current_x, current_y)]) #if tmp_x <= dict_properties['x_stop'][0] and tmp_y <= dict_properties['y_stop'][0] and ((str(current_x)+' '+str(current_y) in list_binary_img) or (str(current_x_stop)+' '+str(current_y) in list_binary_img) or (str(current_x)+' '+str(current_y_stop) in list_binary_img) or (str(current_x_stop)+' '+str(current_y_stop) in list_binary_img)): #print(current_x,current_x_stop,current_y,current_y_stop) #print(start_x,tmp_x,start_y,tmp_y) flag_list = [ 1 for i in range(current_x, current_x_stop + 1) for j in range(current_y, current_y_stop + 1) if str(i) + ' ' + str(j) in list_binary_img ] #print(flag_list) #sys.exit(0) if tmp_x <= maxx and tmp_y <= maxy and (len(flag_list) / ( (current_y_stop + 1 - current_y) * (current_x_stop + 1 - current_x))) > threshold_area_percent: patch_start_x_list.append(start_x) patch_start_y_list.append(start_y) patch_stop_x_list.append(tmp_x) patch_stop_y_list.append(tmp_y) selected_num_patches = selected_num_patches + 1 #print(start_x,start_y,current_x,current_y) start_y = tmp_y start_x = tmp_x print(selected_num_patches, total_num_patches) # print(patch_start_x_list) # print(patch_stop_x_list) # print(patch_start_y_list) # print(patch_stop_y_list) # sys.exit(0) return patch_start_x_list, patch_stop_x_list, patch_start_y_list, patch_stop_y_list
def create_patch_deprecated(svs, patch_sub_size, patch_dir, samp, tf_output, patch_level, threshold, threshold_area_percent, threshold_mean, threshold_std): # print(svs+' '+str(patch_sub_size)+' '+patch_dir+' '+samp+' '+str(patch_level)+' '+tf_output) tf_writer = tf.python_io.TFRecordWriter(tf_output + '/' + samp + '.tfrecords') # threshold=240 # level=2 # threshold=242 level = patch_level OSobj = openslide.OpenSlide(svs) minx = 0 miny = 0 tmp = OSobj.level_dimensions[level] maxx = tmp[0] maxy = tmp[1] # this factor if required to convert level0 start coordinatess to level2 start coordinates (this is required for OSobj.read_region function) multi_factor = OSobj.level_downsamples[level] # print(svs+' '+str(patch_sub_size)+' '+patch_dir+' '+str(maxx)) start_x = minx '''creating sub patches''' '''Iterating through x coordinate''' current_x = 0 filenames = [] # num=0 # while start_x < maxx: while start_x + patch_sub_size < maxx: '''Iterating through y coordinate''' current_y = 0 start_y = miny # while start_y < maxy: while start_y + patch_sub_size < maxy: tmp_start_x = int(round(start_x * multi_factor, 0)) tmp_start_y = int(round(start_y * multi_factor, 0)) try: img_patch = OSobj.read_region((tmp_start_x, tmp_start_y), level, (patch_sub_size, patch_sub_size)) except: sys.exit(0) # img_patch = OSobj.read_region((start_x,start_y), level, (maxx, maxy)) # num=num+1 # img_patch.save(patch_dir+'/'+str(num)+'.png', "png") # sys.exit(1) np_img = np.array(img_patch) # max_min_channels = [np.amax(np_img[:,:,0]), np.amax(np_img[:,:,1]), np.amax(np_img[:,:,2]), np.amin(np_img[:,:,0]), np.amin(np_img[:,:,1]), np.amin(np_img[:,:,2])] im_sub = Image.fromarray(np_img) width, height = im_sub.size '''Change to grey scale''' grey_img = im_sub.convert('L') '''Convert the image into numpy array''' np_grey = np.array(grey_img) patch_mean = round(np.mean(np_grey), 2) patch_std = round(np.std(np_grey), 2) # patch_max=round(np.amax(np_grey),2) # patch_min=round(np.amin(np_grey),2) '''Identify patched where there are tissues''' '''tuple where first element is rows, second element is columns''' idx = np.where(np_grey < threshold) # print(len(idx[0])) # print(np_grey.shape[0]*np_grey.shape[1]) patch_area = len(idx[0]) / (np_grey.shape[0] * np_grey.shape[1]) '''proceed further only if patch has non empty values''' # if len(idx[0])>0 and len(idx[1])>0 and width==patch_sub_size and height==patch_sub_size: # num_patch=samp+"_X_"+str(start_x)+"_"+str(start_x+patch_sub_size)+"_Y_"+str(start_y)+"_"+str(start_y+patch_sub_size)+"_mean_"+str(patch_mean)+"_patch_std_"+str(patch_std)+"_patch_area_"+str(patch_area) num_patch = samp + "_X_" + str(start_x) + "_" + str( start_x + patch_sub_size) + "_Y_" + str(start_y) + "_" + str( start_y + patch_sub_size) if patch_area > threshold_area_percent and patch_mean < threshold_mean and patch_std > threshold_std and width == patch_sub_size and height == patch_sub_size: # if threshold_area> threshold_area_percent and patch_std>5: # if width==patch_sub_size and height==patch_sub_size: # print("sucess") '''creating patch name''' # num_patch=samp+"_X_"+str(start_x)+"_"+str(start_x+patch_sub_size)+"_Y_"+str(start_y)+"_"+str(start_y+patch_sub_size) num_patch = num_patch + "_included" # filenames.append(num_patch) # tmp_png=patch_dir+'/'+num_patch+'.png' # tmp_png=patch_dir+'/'+"included_mean_"+str(patch_mean)+"patch_std_"+str(patch_std)+"_"+num_patch+'.png' # else: # '''creating patch name''' # num_patch=samp+"_X_"+str(start_x)+"_"+str(start_x+patch_sub_size)+"_Y_"+str(start_y)+"_"+str(start_y+patch_sub_size) # filenames.append(num_patch) # tmp_png=patch_dir+'/'+"excluded_mean_"+str(patch_mean)+"patch_std_"+str(patch_std)+"_"+num_patch+'.png' # '''saving image''' # im_sub.save(patch_dir+'/'+num_patch+".png", "png") # sys.exit(1) image_format = "png" height = patch_sub_size width = patch_sub_size image_name = num_patch sub_type = 2 # if p[1] == "TCGA-LUAD": # sub_type=0 # if p[1] == "TCGA-LUSC": # sub_type=1 # sub_type=p[1] if 'BRAF' in samp: sub_type = 1 else: sub_type = 0 mut_type = "" if 'WT' in samp: mut_type = 'WT' elif 'BRAF_V600E' in samp: mut_type = 'BRAF_V600E' elif 'BRAF_V600K' in samp: mut_type = 'BRAF_V600K' elif 'BRAF_V600NENK' in samp: mut_type = 'BRAF_V600NENK' elif 'BRAF_V600X' in samp: mut_type = 'BRAF_V600X' else: mut_type = 'Normal' imgByteArr = io.BytesIO() im_sub.save(imgByteArr, format='PNG') imgByteArr = imgByteArr.getvalue() record = image_to_tfexample_braf(imgByteArr, image_format, int(height), int(width), image_name, sub_type, mut_type) tf_writer.write(record.SerializeToString()) filenames.append(num_patch) start_y = start_y + patch_sub_size current_y = current_y + patch_sub_size start_x = start_x + patch_sub_size current_x = current_x + patch_sub_size # sys.exit(1) tf_writer.close() return filenames
def get_slide_size(filename): wsi = openslide.OpenSlide(filename) slide_size = wsi.level_dimensions[0] return slide_size
patch_size_20X = 1400 level = 0 start = time.time() fdone = '{}/extraction_done.txt'.format(output_folder) if os.path.isfile(fdone): print('fdone {} exist, skipping'.format(fdone)) exit(0) print('extracting {}'.format(output_folder)) if not os.path.exists(output_folder): os.mkdir(output_folder) try: oslide = openslide.OpenSlide(slide_name) if openslide.PROPERTY_NAME_MPP_X in oslide.properties: # 'openslide.mpp-x' mag = 10.0 / float(oslide.properties[openslide.PROPERTY_NAME_MPP_X]) elif "XResolution" in oslide.properties: mag = 10.0 / float(oslide.properties["XResolution"]) elif 'tiff.XResolution' in oslide.properties: # for Multiplex IHC WSIs, .tiff images Xres = float(oslide.properties["tiff.XResolution"]) if Xres < 10: mag = 10.0 / Xres else: mag = 10.0 / (10000 / Xres) # SEER PRAD else: print( '[WARNING] mpp value not found. Assuming it is 40X with mpp=0.254!', slide_name) mag = 10.0 / float(0.254)
with open(args.input_pattern[0], 'r') as f: for line in f: fnames.append(line.strip()) else: # user sent us a wildcard, need to use glob to find files fnames = glob.glob(args.input_pattern[0]) # + print(f"Identified {len(fnames)} image(s)") for ii, fname in tqdm(enumerate(fnames), leave=False): fnamebase = Path(os.path.basename(fname)).stem print(f"{fname}") fnamebase = os.path.splitext(os.path.basename(fname))[0] osh = openslide.OpenSlide(fname) nrow, ncol = osh.level_dimensions[0] for y in tqdm(range(0, osh.level_dimensions[0][1], round(patch_size * osh.level_downsamples[level])), desc="outer", leave=False): for x in tqdm(range(0, osh.level_dimensions[0][0], round(patch_size * osh.level_downsamples[level])), desc=f"innter {y}", leave=False): patch = np.asarray( osh.read_region((x, y), level, (patch_size, patch_size)))[:, :, 0:3] #trim alpha
def get_slices(self): """ :return: [切图存放路径,] """ for image in self.images: t0 = datetime.datetime.now() # 获取病理图像文件名,假如文件名中有空格的话,以 "_" 替换 img_name = os.path.basename(image).split(".")[0].replace(" ", "_") print("Image Process %s ..." % image) try: slide = None if image.endswith(".tif"): slide = openslide.OpenSlide(image) if image.endswith(".kfb"): slide = TSlide(image) if slide: _width, _height = slide.dimensions # 创建进程池 executor = ProcessPoolExecutor( max_workers=cfg.slice.SLICE_PROCESS_NUM) t1 = datetime.datetime.now() print("Adding Job to Pool...") # 获取中心位置坐标 center_x, center_y = _width / 2, _height / 2 # 计算左上坐标 width = (cfg.center.PATCH_NUM - 1) * cfg.center.DELTA + cfg.center.PATCH_WIDTH height = (cfg.center.PATCH_NUM - 1) * cfg.center.DELTA + cfg.center.PATCH_HEIGHT print(width, height) x = center_x - width / 2 y = center_y - height / 2 # 修正坐标 x = x if x >= 0 else 0 y = y if y >= 0 else 0 # 计算重点位置 width = x + width height = y + height x, y, width, height = int(x), int(y), int(width), int( height) # 收集任务结果 tasks = [] while x < width: tasks.append( executor.submit(worker_in_memory, image, x, y, height, cfg.center.PATCH_WIDTH, cfg.center.PATCH_HEIGHT, cfg.center.DELTA)) x += cfg.center.DELTA t2 = datetime.datetime.now() job_count = len(tasks) print( "Done, cost: %s, Total Job Count: %s, Worker Count: %s" % ((t2 - t1), job_count, cfg.slice.SLICE_PROCESS_NUM)) results = [] # 计数器 patch_count = 0 for future in as_completed(tasks): queue = future.result() results.extend(queue) count = len(queue) patch_count += count job_count -= 1 print( "One Job Done, Got %s patches, last Job Count: %s" % (count, job_count)) t3 = datetime.datetime.now() print( "File - %s, Size: (%s, %s), Got Patch Num %s, Total cost time: %s" % (img_name, _width, _height, patch_count, t3 - t0)) return results except: raise
for pts in cntr_pts: pts = [np.array(pts, dtype=np.int32)] # Curved line if label == 2: mask = cv2.polylines(mask, pts, isClosed=False, color=label, thickness=1) # Contour else: mask = cv2.drawContours(mask, pts, -1, label, -1) return mask if __name__ == "__main__": svs_load_dir = "./svs_folder/" xml_load_dir = "./xml_folder/" xml_fns = sorted(glob.glob(xml_load_dir + "*.xml") + glob.glob(xml_load_dir + "*.XML")) level = 2 mask_save_dir = f"./mask_img_l{level}/" os.makedirs(mask_save_dir, exist_ok=True) wsi_uid_pattern = "[a-zA-Z]*_PNI2021chall_train_[0-9]{4}" wsi_regex = re.compile(wsi_uid_pattern) for xml_fn in tqdm(xml_fns): wsi_uid = wsi_regex.findall(xml_fn)[0] slide = openslide.OpenSlide(svs_load_dir + wsi_uid + ".svs") mask = xml2mask(xml_fn, slide, level) save_name = f"{wsi_uid}_l{level}_mask.tif" io.imsave(mask_save_dir + save_name, mask.astype(np.uint8), check_contrast=False)
def getWsi(path): #imports a WSI import openslide slide = openslide.OpenSlide(path) return slide
def load_svs_shape(fn, level=0): #print('loading shape of <{}> / level={}..'.format(fn, level)) imgh = openslide.OpenSlide(fn) return [imgh.level_dimensions[level][1], imgh.level_dimensions[level][0]]
caseID = int(caseID) im_list = glob.glob( os.path.join(folder_path, str(caseID), '*' + ext)) metrics_list = [{ 'accuracy': 0, 'metrics': (0, 0, 0, 0) }] * len(im_list) index = 0 filename = 'test_result.pkl' out_p = os.path.join(folder_path, str(caseID), filename) if os.path.exists(out_p): continue for im_p in im_list: base, file_extend = os.path.splitext(im_p) if 'openslide' in sys.modules: im = openslide.OpenSlide(im_p) im_size = (im.dimensions[1], im.dimensions[0]) bags = Bag(h=im_size[0], w=im_size[1], size=bag_size, overlap_pixel=overlap, padded=True) result = np.zeros(len(bags)) for i in range(len(bags)): bbox = bags.bound_box(i) size_r = bbox[1] - bbox[0] size_c = bbox[3] - bbox[2] top_left = (bbox[2], bbox[0]) bag = im.read_region( top_left, image_level, (size_c, size_r)).convert('RGB')
def apply_to_slide(args): # Read the trained model model_file = os.path.abspath('./exp01/checkpoint.200th.tar') model = load_model(args.network) model.cuda() model.eval() # Read the slide osl = openslide.OpenSlide(args.slide) # Transform tran_norm = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # Break the image into patches, for each patch, run the model on it (w, h) = osl.level_dimensions[0] # Size of the input window and number of windows window_size = int(args.window) nw_x, nw_y = math.ceil(w / window_size), math.ceil(h / window_size) # Output window size and output dimensions output_window_size = int(window_size / 256) (ow, oh) = nw_x * output_window_size, nw_y * output_window_size output = np.zeros((ow, oh, 20)) # Set up a threaded worker to read openslide patches worker = threading.Thread(target=osl_worker, args=(osl, (0, nw_x), (0, nw_y), window_size, 0)) worker.start() while True: # Read a chunk of data q_data = osl_read_chunk_from_queue() # Check for sentinel value if q_data is None: break # Get the values ((i_x, i_y), (c_x, c_y, wd), _, window) = q_data # The corner of the region W = tran_norm(window).cuda() R = run_model_on_window(model, W).cpu().numpy().transpose((1, 0, 2)) co_x, co_y = output_window_size * i_x, output_window_size * i_y output[co_x:co_x + output_window_size, co_y:co_y + output_window_size, :] = R print('Finished (%d,%d) of (%d,%d)' % (i_x, i_y, nw_x, nw_y)) # Clip the output output = output[0:math.ceil(w / 256), 0:math.ceil(h / 256), :].transpose(1, 0, 2) # Set the spacing based on openslide # Get the image spacing from the header, in mm units (sx, sy) = (0.0, 0.0) if 'openslide.mpp-x' in osl.properties: sx = float(osl.properties['openslide.mpp-x']) * 256 / 1000.0 sy = float(osl.properties['openslide.mpp-y']) * 256 / 1000.0 elif 'openslide.comment' in osl.properties: for z in osl.properties['openslide.comment'].split('\n'): r = parse.parse('Resolution = {} um', z) if r is not None: sx = float(r[0]) * 256 / 1000.0 sy = float(r[0]) * 256 / 1000.0 # If there is no spacing, throw exception if sx == 0.0 or sy == 0.0: raise Exception('No spacing information in image') # Report spacing information print("Spacing of the mri-like image: %gx%gmm\n" % (sx, sy)) # Write the result as a NIFTI file nii = sitk.GetImageFromArray(np.transpose(output, (0, 1, 2)), True) nii.SetSpacing((sx, sy)) sitk.WriteImage(nii, args.output)
import openslide import cv2 import numpy as np import matplotlib.pyplot as plt import glob from skimage.external import tifffile import os whole_ambiguity_path = 'D:/PAIP_viable_tumor_patch/Ambiguity/' original_WSI_path = 'D:/PAIP_original_patch/' tumor_abs_path = 'D:/4_PAIP_cancer_label_patch/' for i in range(1, 21): WSI_path = original_WSI_path + 'Training_phase_1_{0:03d}'.format(i) a = glob.glob(WSI_path + '/*.svs') WSI_img = openslide.OpenSlide(a[0]) WSI_w, WSI_h = WSI_img.level_dimensions[0] file_list = glob.glob(WSI_path + '/*.tif') file_name = file_list[0].split("\\") file_name = file_name[-1].split('.') file = file_name[0].split('_') if file[3] == 'viable': viable_path = file_list[0] tumor_path = file_list[1] else: tumor_path = file_list[0] viable_path = file_list[1] tumor_img = tifffile.imread(tumor_path)
indir = args.data_dir imagedir = indir+'images/' maskdir = indir+'masks/' outdir = args.output_dir for root, dirnames, filenames in os.walk(imagedir): for filename in fnmatch.filter(filenames, '*.tif'): imagefile = imagedir + filename[:-4] + '.tif' maskfile = maskdir + filename[:-4] + '_Mask.tif' print imagefile infile = indir + imagefile image = openslide.OpenSlide(imagefile) mask = openslide.OpenSlide(maskfile) (w, h) = image.level_dimensions[LEVEL] if not os.path.exists(outdir + filename[:-4]): os.makedirs(outdir + filename[:-4]) for i in range(w / width): for j in range(h / height): x = i*width y = j*height im = np.array(image.read_region((x*(2**LEVEL), y*(2**LEVEL)), LEVEL, (width, height)))[:,:,0:3] if filter_image(im, width, height): im = np.array(image.read_region((x*(2**LEVEL), y*(2**LEVEL)), 0, (width*(2**LEVEL), height*(2**LEVEL))).resize((width,height),Image.ANTIALIAS))[:,:,0:3]
def get_wsi_patch(filename, patch_size=256, downsample=[1], augment=0): ''' takes a wsi and returns a random patch of patch_size downsample = >1 downsample of patch (passed as a list) augment = [0,1] the percent of data that will be augmented ''' try: augment = augment.numpy() except: pass try: patch_size = patch_size.numpy() except: pass try: filename = filename.numpy() except: pass try: downsample = downsample.numpy() except: pass # choose random downsample downsample = random.choice(downsample) wsi = openslide.OpenSlide(filename) l_dims = wsi.level_dimensions level = wsi.get_best_level_for_downsample(downsample + 0.1) try: base_name = filename.decode().split('.')[0] except: base_name = filename.split('.')[0] xml_path = '{}.xml'.format(base_name) slide_mask = get_slide_mask(filename) # test for xml and choose random annotated class for patch if os.path.isfile(xml_path): class_num = get_num_classes(xml_path) class_num = int(round(np.random.uniform(low=0, high=class_num - 1))) else: class_num = 0 region, mask, x_start, y_start = get_patch(wsi, xml_path, class_num, l_dims, level, slide_mask, patch_size, filename, downsample, augment) # scale to [-1,1] # region = scale_patch(region) # region = get_random_patch(wsi, l_dims, level, mask, patch_size, filename, downsample, augment) # region = np.transpose(region, (2,0,1)) # [CWH] imageID = '{}-{}-{}-{}'.format( base_name.split('/')[-1], x_start, y_start, downsample) return [region, mask, imageID]
subprocess.call(['scp', remote_dataset_folder_detail+'/*features.csv',local_dataset_folder_detail]); #copy imagwe svs file from remote folder to local folder image_file_name=case_id+".svs"; image_file = os.path.join(local_image_folder, image_file_name); if not os.path.isfile(image_file): print "image svs file is not available, then download it to local folder."; img_path=findImagePath(case_id); full_image_file = os.path.join(remote_image_folder, img_path); subprocess.call(['scp', full_image_file,local_image_folder]); else: print "image svs file is available in folder!" #open image file with open slide to get meta data try: img = openslide.OpenSlide(image_file); except Exception as e: print(e); continue; image_width =img.dimensions[0]; image_height =img.dimensions[1]; #get human markup data humanMarkupList_tumor,humanMarkupList_non_tumor=findTumor_NonTumorRegions(case_id,user); #exit(); if(len(humanMarkupList_tumor) ==0 and humanMarkupList_non_tumor==0): print "No tumor or non tumor regions has been marked in this image by user %s." % user; continue;
def get_patch_from_points(filename, point, patch_size, patch_width, level, downsample=1): ''' takes a wsi filename and tuple (x,y) location and returns a patch of patch_size downsample = >1 downsample of patch ''' try: patch_size = patch_size.numpy() except: pass try: filename = filename.numpy() except: pass try: downsample = downsample.numpy() except: pass try: patch_width = patch_width.numpy() except: pass try: level = level.numpy() except: pass try: base_name = filename.decode().split('.')[0] except: base_name = filename.split('.')[0] wsi = openslide.OpenSlide(filename) l_dims = wsi.level_dimensions level = wsi.get_best_level_for_downsample(downsample + 0.1) level_dims = l_dims[level] level_downsample = wsi.level_downsamples[level] scale_factor = int(round(downsample / level_downsample)) patch_width = patch_size * scale_factor region = wsi.read_region(point, level, (patch_width, patch_width)) if scale_factor > 1: region = region.resize((patch_size, patch_size), resample=1) region = np.array(region)[:, :, :3] # scale to [-1,1] # region = scale_patch(region) # region = np.transpose(region, (2,0,1)) # [CWH] imageID = '{}-{}-{}-{}'.format( base_name.split('/')[-1], point[0], point[1], downsample) # create zeros mask to pass - NOT USED LATER mask = np.zeros([patch_size, patch_size], dtype=np.uint8) return [region, mask, imageID]
def startAnalyze(self): self.openFileBtn_['state'] = tk.DISABLED self.startAnalyzeBtn_['state'] = tk.DISABLED fileName = self.openFileNameStr_.get() filePrex = fileName.split('.')[1] if filePrex.lower() == 'tiff' or filePrex.lower() == 'svs': slide = openslide.OpenSlide(fileName) bestResolution = slide.level_dimensions[0] self.canvasWidth_ = 300.0 self.canvasHeight_ = math.floor( (self.canvasWidth_ / bestResolution[0]) * bestResolution[1]) size = self.thumbnailSize_ = (self.canvasWidth_, self.canvasHeight_) slide_thumbnail = slide.get_thumbnail(size) from PIL import Image, ImageTk self.render_ = ImageTk.PhotoImage(slide_thumbnail) if hasattr(self, 'thumbnail_'): self.thumbnail_.destroy() delattr(self, 'thumbnail_') self.thumbnail_ = tk.Label(self.root_, image=self.render_) self.thumbnail_.image = self.render_ self.thumbnail_.place(x=470, y=230) if hasattr(self, 'rootFrame_'): self.rootFrame_.destroy() delattr(self, 'rootFrame_') self.rootFrame_ = tk.Frame(self.root_, width=500, height=400) ##bg='blue' self.rootFrame_.place(x=20, y=200, anchor=tk.NW) from PIL import Image, ImageTk self.canvas_ = Canvas(self.root_, width=self.canvasWidth_, height=self.canvasHeight_) self.canvas_.place(x=470, y=230) self.canvas_.create_image(self.canvasWidth_ / 2, self.canvasHeight_ / 2, anchor=tk.CENTER, image=self.render_) self.canvas_.bind("<Button-1>", self.onClickInThumbnil) self.canvas_.bind("<B1-Motion>", self.onChangeRegion) self.canvas_.bind("<ButtonRelease-1>", self.onClickFinished) choseResolutionLabel = tk.Label( self.rootFrame_, text="Choose the output Resolution", font=("Arial", 12), width=25, height=1) choseResolutionLabel.place(x=120, y=20, anchor=tk.CENTER) self.resolutions_ = tk.StringVar().set(slide.level_dimensions[0]) self.resolutionChosen_ = ttk.Combobox( self.rootFrame_, width=20, textvariable=self.resolutions_, state="readonly") #,command= lambda:self.onSizeChange() self.resolutionChosen_["values"] = slide.level_dimensions # DEFAULT OUOUT RESOLUTION IS THE BEST RESOLUTION self.resolutionChosen_.current(0) self.resolutionChosen_.place(x=120, y=50, anchor=tk.CENTER) self.resolutionChosen_.bind("<<ComboboxSelected>>", self.onSizeChange) outputTypeLabel = tk.Label(self.rootFrame_, text="Choose output type", font=("Arial", 12), width=25, height=1) outputTypeLabel.place(x=350, y=20, anchor=tk.CENTER) self.outputType_ = tk.StringVar() self.outputType_ = ttk.Combobox(self.rootFrame_, width=20, textvariable=self.outputType_, state="readonly") self.outputType_["values"] = ("By Piece", "By Range") defaultResl = slide.level_dimensions[0] if outputModeClassifier(defaultResl) == "RangeMode": self.selectRegionMode_ = True self.outputType_.current(1) self.addNewRegionBtn_ = tk.Button( self.rootFrame_, text="Add Region", width=10, command=lambda: self.onAddNewRegion()) self.addNewRegionBtn_.place(x=310, y=140, anchor=tk.CENTER) self.redoBtn_ = tk.Button( self.rootFrame_, text="Delete", command=lambda: self.onDeleteRegion()) self.redoBtn_.place(x=400, y=140, anchor=tk.CENTER) self.redoBtn_['state'] = tk.DISABLED # active, disabled, or normal self.newRegion_ = tk.StringVar() self.rangeChosen_ = ttk.Combobox(self.rootFrame_, width=20, textvariable=self.newRegion_, state="readonly") self.rangeChosen_["values"] = self.selectedRegions_ self.rangeChosen_.place(x=350, y=170, anchor=tk.CENTER) self.rangeChosen_.bind("<<ComboboxSelected>>", self.onRangeItemSelected) else: self.outputType_.current(0) self.selectRegionMode_ = False self.outputType_.place(x=350, y=50, anchor=tk.CENTER) self.outputType_.bind("<<ComboboxSelected>>", self.onChangeOutputType) pieceSizeLabel = tk.Label(self.rootFrame_, text="Choose piece size", font=("Arial", 12), width=25, height=1) pieceSizeLabel.place(x=350, y=80, anchor=tk.CENTER) self.pieceSize_ = tk.StringVar() self.pieceSizeChosen_ = ttk.Combobox(self.rootFrame_, width=20, textvariable=self.pieceSize_, state="readonly") self.pieceSizeChosen_["values"] = ("1000", "3000", "5000", "10000") self.pieceSizeChosen_.current(3) self.pieceSizeChosen_.place(x=350, y=110, anchor=tk.CENTER) self.pieceSizeChosen_.bind("<<ComboboxSelected>>", self.onSizeChange) choseFormatLabel = tk.Label(self.rootFrame_, text="Choose the output Fromat", font=("Arial", 12), width=25, height=1) choseFormatLabel.place(x=120, y=80, anchor=tk.CENTER) self.outputFormat_ = tk.StringVar().set(".png") self.outputFormatChosen_ = ttk.Combobox( self.rootFrame_, width=20, textvariable=self.outputFormat_, state="readonly") self.outputFormatChosen_["values"] = (".png", ".jpeg", ".bmp") self.outputFormatChosen_.current(0) self.outputFormatChosen_.place(x=120, y=110, anchor=tk.CENTER) choseChannelLabel = tk.Label(self.rootFrame_, text="Choose the output Channel", font=("Arial", 12), width=25, height=1) choseChannelLabel.place(x=120, y=140, anchor=tk.CENTER) self.outputChannel_ = tk.StringVar().set("3:RGB") self.outputChannleChosen_ = ttk.Combobox( self.rootFrame_, width=20, textvariable=self.outputChannel_, state="readonly") self.outputChannleChosen_["values"] = ("1:gray", "3:RGB", "4:RGBA") self.outputChannleChosen_.current(1) self.outputChannleChosen_.place(x=120, y=170, anchor=tk.CENTER) self.startOutputBtn_ = tk.Button( self.rootFrame_, text="Output", command=lambda: self.startOutput(slide)) self.startOutputBtn_.place(x=235, y=220, anchor=tk.CENTER) self.openFileBtn_['state'] = tk.NORMAL self.resetBtn_['state'] = tk.NORMAL if self.selectRegionMode_: self.startOutputBtn_['state'] = tk.DISABLED else: messagebox.showinfo('SUPPORT .svs FILE AND .tiff FILE ONLY') self.openFileBtn_['state'] = tk.NORMAL return False
def create_binary_mask_new(rgb2lab_thresh, svs_file, patch_dir, samp): #img = Image.open(top_level_file_path) OSobj = openslide.OpenSlide(svs_file) #toplevel=OSobj.level_count-1 #patch_sub_size_x=OSobj.level_dimensions[toplevel][0] #patch_sub_size_y=OSobj.level_dimensions[toplevel][1] #img = OSobj.read_region((0,0), toplevel, (patch_sub_size_x, patch_sub_size_y)) divisor = int(OSobj.level_dimensions[0][0] / 500) patch_sub_size_x = int(OSobj.level_dimensions[0][0] / divisor) patch_sub_size_y = int(OSobj.level_dimensions[0][1] / divisor) img = OSobj.get_thumbnail((patch_sub_size_x, patch_sub_size_y)) toplevel = [patch_sub_size_x, patch_sub_size_y, divisor] img = img.convert('RGB') np_img = np.array(img) #binary_img= (np_img==[254,0,0] or np_img==[255,0,0]).all(axis=2) #binary_img=binary_img.astype(int) #print(binary_img.shape) #np_img[binary_img == 1] = [255, 255, 255] #img = Image.fromarray(np_img) img.save(patch_dir + '/' + samp + "_original.png", "png") #sys.exit(0) # lab_img = rgb2lab(np_img) # l_img = lab_img[:, :, 0] # patch_max=round(np.amax(l_img),2) # patch_min=round(np.amin(l_img),2) # print(patch_min,patch_max) # print(l_img) # # # lab_img = rgb2hed(np_img) # l_img = lab_img[:, :, 0] # patch_max = round(np.amax(l_img), 2) # patch_min = round(np.amin(l_img), 2) # print(patch_min, patch_max) # print(l_img) # # # lab_img = rgb2hed(np_img) # l_img = lab_img[:, :, 2] # patch_max = round(np.amax(l_img), 2) # patch_min = round(np.amin(l_img), 2) # print(patch_min, patch_max) # print(l_img) # lab_img = rgb2hed(np_img) l_img = lab_img[:, :, 1] patch_max = round(np.amax(l_img), 2) patch_min = round(np.amin(l_img), 2) #print(patch_min, patch_max) #print(l_img) #rgb2lab_thresh=0.18 binary_img = l_img > float(rgb2lab_thresh) binary_img = binary_img.astype(int) np_img[binary_img == 0] = [0, 0, 0] np_img[binary_img == 1] = [255, 255, 255] im_sub = Image.fromarray(np_img) im_sub.save(patch_dir + '/' + samp + "_mask.png", "png") #sys.exit(0) idx = np.sum(binary_img) mask_area = idx / (binary_img.size) print(mask_area) idx = np.where(binary_img == 1) list_binary_img = [] for i in range(0, len(idx[0]), 1): x = idx[1][i] y = idx[0][i] list_binary_img.append(str(x) + ' ' + str(y)) #return np.array(binary_img) return list_binary_img, toplevel
label_path = None print(patient_name) file_path = os.path.join(heatmaps_path, patient_name + '.npy') hmap = np.load(file_path) coords = np.where(hmap >= THRESHOLD) for j in range(len(coords[0])): x_coord = pow(2, LEVEL) * coords[0][j] y_coord = pow(2, LEVEL) * coords[1][j] # print (x_coord, y_coord) # img_obj = openslide.OpenSlide(image_path) # img = img_obj.read_region((x_coord - PATCH_SIZE//2, y_coord - PATCH_SIZE//2), # 0, # (PATCH_SIZE, PATCH_SIZE)).convert('RGB') if label_path is not None: label_obj = openslide.OpenSlide(label_path) label_img = label_obj.read_region( (x_coord - PATCH_SIZE // 2, y_coord - PATCH_SIZE // 2), 0, (PATCH_SIZE, PATCH_SIZE)).convert('L') else: label_img = np.zeros((PATCH_SIZE, PATCH_SIZE)) tumor_fraction = np.count_nonzero(label_img) / np.prod( np.array(label_img).shape) if tumor_fraction < 0.05: # imshow(img, label_img, title = ['Image', str(tumor_fraction)+ str()]) probs_map.append((patient_name, str(x_coord), str(y_coord), str(tumor_fraction))) with open(csv_path, 'w') as out: csv_out = csv.writer(out)
def create_tfrecord(patch_start_x_list, patch_stop_x_list, patch_start_y_list, patch_stop_y_list, samp, patch_dir, patch_level, svs_file, toplevel, tf_output, patch_size, mut_type, threshold_mean, threshold_std, patch_byte_cutoff, xml_ann): '''Reading xml annotations''' divisor = toplevel[2] xml = minidom.parse(xml_ann) # The first region marked regions_ = xml.getElementsByTagName("Region") regions, region_labels = [], [] region_type_label = [] #finalcoords = np.array([]) x_cor = 1 for region in regions_: vertices = region.getElementsByTagName("Vertex") r_label = region.getAttribute('Id') type_label = region.getAttribute('GeoShape') #print(input_label_file+' '+" Region "+r_label+" "+type_label) #sys.exit(0) #continue region_labels.append(r_label) region_type_label.append(type_label) # Store x, y coordinates into a 2D array in format [x1, y1], [x2, y2], ... #coords = np.zeros((len(vertices), 2)) coords = [] for i, vertex in enumerate(vertices): #coords[i][0] = vertex.attributes['X'].value #coords[i][1] = vertex.attributes['Y'].value x = int(float(vertex.attributes['X'].value) / divisor) y = int(float(vertex.attributes['Y'].value) / divisor) coords.append((x, y)) print(i, x, y) coords.append(coords[0]) #if x_cor==1: # finalcoords=coords #else: # finalcoords= np.concatenate((finalcoords, coords), axis=0) #regions.append(coords) p1 = Polygon(coords) p1 = p1.buffer(0) #print(p1) #print(p1.is_empty) #print("poly",p1.is_valid) if not p1.is_empty: regions.append(p1) tf_writer = tf.python_io.TFRecordWriter( os.path.join(tf_output, samp + '.tfrecords')) #file_patches = os.listdir(os.path.join(patch_dir,samp+'_patches')) #for i in file_patches: OSobj = openslide.OpenSlide(svs_file) poly_included = [] for i in range(0, len(patch_start_x_list), 1): x1 = int(patch_start_x_list[i] * OSobj.level_downsamples[patch_level]) x2 = int(patch_stop_x_list[i] * OSobj.level_downsamples[patch_level]) y1 = int(patch_start_y_list[i] * OSobj.level_downsamples[patch_level]) y2 = int(patch_stop_y_list[i] * OSobj.level_downsamples[patch_level]) img = OSobj.read_region((x1, y1), patch_level, (patch_size, patch_size)) print(x1, y1, patch_level, patch_size) '''Change to grey scale''' grey_img = img.convert('L') '''Convert the image into numpy array''' np_grey = np.array(grey_img) patch_mean = round(np.mean(np_grey), 2) patch_std = round(np.std(np_grey), 2) image_format = "png" height = patch_size width = patch_size imgByteArr = io.BytesIO() img.save(imgByteArr, format='PNG') size_bytes = imgByteArr.tell() #if patch_mean<threshold_mean and patch_std>threshold_std and size_bytes>patch_byte_cutoff: image_name = samp + "_x_" + str(x1) + "_" + str(x2) + "_y_" + str( y1) + "_" + str(y2) + '_' + str(patch_mean) + '_' + str( patch_std) + '_' + str(size_bytes) + ".png" #img.save(image_name, format='PNG') #if size_bytes>0: x1 = int( (patch_start_x_list[i] * OSobj.level_downsamples[patch_level]) / toplevel[2]) x2 = int( (patch_stop_x_list[i] * OSobj.level_downsamples[patch_level]) / toplevel[2]) y1 = int( (patch_start_y_list[i] * OSobj.level_downsamples[patch_level]) / toplevel[2]) y2 = int( (patch_stop_y_list[i] * OSobj.level_downsamples[patch_level]) / toplevel[2]) poly = Polygon([(x1, y1), (x2, y1), (x2, y2), (x1, y2), (x1, y1)]) res = [poly.intersects(i) for i in regions] if True in res: poly_included.append(poly) imgByteArr = imgByteArr.getvalue() record = image_to_tfexample_chek2(imgByteArr, image_format, int(height), int(width), image_name, mut_type) tf_writer.write(record.SerializeToString()) tf_writer.close() patch_sub_size_x = toplevel[0] patch_sub_size_y = toplevel[1] img_patch = OSobj.get_thumbnail((patch_sub_size_x, patch_sub_size_y)) np_img = np.array(img_patch) patch_sub_size_y = np_img.shape[0] patch_sub_size_x = np_img.shape[1] f, ax = plt.subplots(frameon=False) f.tight_layout(pad=0, h_pad=0, w_pad=0) ax.set_xlim(0, patch_sub_size_x) ax.set_ylim(patch_sub_size_y, 0) ax.imshow(img_patch) for j in range(0, len(poly_included)): patch1 = PolygonPatch(poly_included[j], facecolor=[0, 0, 0], edgecolor="green", alpha=0.3, zorder=2) ax.add_patch(patch1) ax.set_axis_off() DPI = f.get_dpi() plt.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0) f.set_size_inches(patch_sub_size_x / DPI, patch_sub_size_y / DPI) f.savefig(os.path.join(patch_dir, samp + "_mask_patchoverlay_final.png"), pad_inches='tight')
def __init__(self, path, **kwargs): """ Initialize the tile class. :param path: the associated file path. """ super(SVSFileTileSource, self).__init__(path, **kwargs) largeImagePath = self._getLargeImagePath() try: self._openslide = openslide.OpenSlide(largeImagePath) except openslide.lowlevel.OpenSlideUnsupportedFormatError: raise TileSourceException('File cannot be opened via OpenSlide.') # The tile size isn't in the official openslide interface # documentation, but every example has the tile size in the properties. # Try to read it, but fall back to 256 if it isn't et. self.tileWidth = self.tileHeight = 256 try: self.tileWidth = int( self._openslide.properties['openslide.level[0].tile-width']) except ValueError: pass try: self.tileHeight = int( self._openslide.properties['openslide.level[0].tile-height']) except ValueError: pass if self.tileWidth <= 0 or self.tileHeight <= 0: raise TileSourceException('OpenSlide tile size is invalid.') self.sizeX = self._openslide.dimensions[0] self.sizeY = self._openslide.dimensions[1] if self.sizeX <= 0 or self.sizeY <= 0: raise TileSourceException('OpenSlide image size is invalid.') self.levels = int( math.ceil( max(math.log(float(self.sizeX) / self.tileWidth), math.log(float(self.sizeY) / self.tileHeight)) / math.log(2))) + 1 if self.levels < 1: raise TileSourceException( 'OpenSlide image must have at least one level.') self._svslevels = [] svsLevelDimensions = self._openslide.level_dimensions # Precompute which SVS level should be used for our tile levels. SVS # level 0 is the maximum resolution. We assume that the SVS levels are # in descending resolution and are powers of two in scale. For each of # our levels (where 0 is the minimum resolution), find the lowest # resolution SVS level that contains at least as many pixels. If this # is not the same scale as we expect, note the scale factor so we can # load an appropriate area and scale it to the tile size later. for level in range(self.levels): levelW = max(1, self.sizeX / 2**(self.levels - 1 - level)) levelH = max(1, self.sizeY / 2**(self.levels - 1 - level)) # bestlevel and scale will be the picked svs level and the scale # between that level and what we really wanted. We expect scale to # always be a positive integer power of two. bestlevel = 0 scale = 1 for svslevel in range(len(svsLevelDimensions)): if (svsLevelDimensions[svslevel][0] < levelW - 1 or svsLevelDimensions[svslevel][1] < levelH - 1): break bestlevel = svslevel scale = int(round(svsLevelDimensions[svslevel][0] / levelW)) self._svslevels.append({'svslevel': bestlevel, 'scale': scale})
# !Created by babiking@sensetime on May 13th, 2018 to test openslide APIs and code encapsulation/wrapper pipeline import openslide import numpy as np from matplotlib import pyplot as plt # !API-1: Open slide for .tiff/.svs/.mrxs/.bif/.ndpi/.vms/.vmu/.svslide file slide = openslide.OpenSlide("data/test.tiff") # !API-2: Level-count i.e. scanning resolution levels or scale factor 40X levelCount = slide.level_count # !API-3: Dimensions [nImageWidth, nImageHeight] = slide.dimensions # ! or dimensions at level k k = 0 [nImageWidthK, nImageHeightK] = slide.level_dimensions[k] # [nImageWidth, nImageHeight] slideDownsampleK = slide.level_downsamples[k] # !API-4: Read ROI area ''' Function: read_region(location, level, size) Input: [1] locate i.e. [pX, pY] the left-top corner of read-in slideImage [2] level i.e. downsampling scale level [3] size i.e. size of ROI area Output: [1] roi_image '''
def process_annotations(annotation_files_list, overwrite_aggregated_annotation_file=False, create_symlink=False): """ Helper function to process a list of JSON files with annotations. :param annotation_files_list: list of JSON filenames containing annotations. :return: """ for annotation_file in annotation_files_list: print('File: ' + os.path.basename(annotation_file)) # name of the file that we are going to save the aggregated annotations to aggregated_annotation_file = annotation_file.replace( '.json', '_aggregated.json') # name of the original .ndpi file ndpi_file = os.path.basename(annotation_file).replace( auto_filename_suffix, '.ndpi') ndpi_file = os.path.join(histology_dir, ndpi_file) im = openslide.OpenSlide(ndpi_file) xres = 1e-2 / float(im.properties['tiff.XResolution']) yres = 1e-2 / float(im.properties['tiff.YResolution']) # aggregate cells from all blocks and write/overwrite a file with them if not os.path.isfile(aggregated_annotation_file ) or overwrite_aggregated_annotation_file: # load contours from annotation file cells = cytometer.data.aida_get_contours( annotation_file, layer_name='White adipocyte.*') # create AIDA items to contain contours items = cytometer.data.aida_contour_items(cells, f_area2quantile_m, cm='quantiles_aida', xres=xres * 1e6, yres=yres * 1e6) # write contours to single layer AIDA file (one to visualise, one to correct manually) cytometer.data.aida_write_new_items(aggregated_annotation_file, items, mode='w', indent=0) if create_symlink: # name expected by AIDA for annotations symlink_name = os.path.basename(ndpi_file).replace( '.ndpi', '.json') symlink_name = os.path.join(annotations_dir, symlink_name) # create symlink to the aggregated annotation file from the name expected by AIDA if os.path.isfile(symlink_name): if os.path.islink(symlink_name): # delete existing symlink os.remove(symlink_name) else: raise FileExistsError( 'File found with the name of the symlink we are trying to create' ) else: os.symlink(os.path.basename(aggregated_annotation_file), symlink_name)
def open_im(im_path): im = ops.OpenSlide(im_path.as_posix()) return im
def annotate_tiles(self): file_names_list = [ fname for fname in os.listdir(self.input_slide_dir) if fname.endswith(self.ext) is True ] cws_dir = "cws" if os.path.exists(os.path.join(self.output_dir, cws_dir)) is False: os.makedirs(os.path.join(self.output_dir, cws_dir)) for slide in file_names_list: if os.path.exists(os.path.join(self.output_dir, cws_dir, slide)) is False: os.makedirs(os.path.join(self.output_dir, cws_dir, slide)) if os.path.exists( os.path.join(self.output_dir, cws_dir, slide, "img_mask")) is False: os.makedirs( os.path.join(self.output_dir, cws_dir, slide, "img_mask")) if os.path.exists( os.path.join(self.output_dir, cws_dir, slide, "Mat_files")) is False: os.makedirs( os.path.join(self.output_dir, cws_dir, slide, "Mat_files")) osr = openslide.OpenSlide(os.path.join(self.input_slide_dir, slide)) level = 0 ds = osr.level_downsamples[level] w, h = osr.level_dimensions[0] ############################################################ #Uncomment this section if you have not created the cws/image #tiles of size 2000x2000 from the raw whole slide images. ############################################################ # width=2000 # height=2000 # # k=0 # for j in range(0,h,2000): # for i in range(0,w,2000): # # if(i+2000>w): # width=w-i # else: # width=2000 # if(j+2000>h): # height=h-j # else: # height=2000 # height=int(height/ds) # width=int(width/ds) # out=osr.read_region((i,j),level,(width,height)) # temp = np.array(out) # temp = temp[:, :, 0:3] # out = Image.fromarray(temp) # out.save(os.path.join(self.output_dir, cws_dir,slide +'/Da'+str(k)+".jpg")) # k+=1 mask_path = os.path.join(self.output_dir, cws_dir, slide, "img_mask") doc = xml.dom.minidom.parse( os.path.join(self.input_slide_dir, os.path.splitext(slide)[0] + '.xml')) Region = doc.getElementsByTagName("Region") X = [] Y = [] i = 0 for Reg in Region: X.append([]) Y.append([]) Vertex = Reg.getElementsByTagName("Vertex") for Vert in Vertex: X[i].append(int(round(float(Vert.getAttribute("X"))))) Y[i].append(int(round(float(Vert.getAttribute("Y"))))) i += 1 i1 = 0 points = [] for j in range(0, h, 2000): for i in range(0, w, 2000): img = io.imread( os.path.join(self.output_dir, cws_dir, slide, 'Da' + str(i1) + ".jpg")) [hh, ww, cc] = img.shape blank_image = np.zeros((hh, ww), np.uint8) for k in range(len(X)): #print("######") if i < max(X[k]) and i + 2000 > min(X[k]) and j < max( Y[k]) and j + 2000 > min(Y[k]): points = [] for i3 in range(len(X[k])): points.append([ int((X[k][i3] - i) / ds), int((Y[k][i3] - j) / ds) ]) pts = np.array(points, np.int32) pts = pts.reshape((-1, 1, 2)) cv2.drawContours(blank_image, [pts], 0, (255), -1) cv2.imwrite( os.path.join(mask_path, 'Da' + str(i1) + ".jpg"), blank_image) i1 += 1 mat_path = os.path.join(self.output_dir, cws_dir, slide, "Mat_files") list_img = os.listdir(mask_path) list_img = sorted(list_img, key=natural_key) for list11 in list_img: gray = io.imread(mask_path + "//" + list11) gray1 = np.where(gray == 255) if len(gray1[0]) != 0: img = io.imread(mask_path + "//" + list11) im1 = img Mask = gray data = {} data['im'] = im1 data['Mask'] = Mask GT = {} GT['GT'] = data iosci.savemat( os.path.join( mat_path, list11[:-4] + "_" + os.path.splitext(slide)[0] + ".mat"), GT)
def get_slices(self): """ :return: [切图存放路径,] """ # 处理成功任务列表 done = [] # 处理失败任务列表 fail = [] for image in self.images: t0 = datetime.datetime.now() # 获取病理图像文件名,假如文件名中有空格的话,以 "_" 替换 img_name = os.path.basename(image).split(".")[0].replace(" ", "_") print("Image Process %s ..." % image) try: slide = None if image.endswith(".tif"): slide = openslide.OpenSlide(image) if image.endswith(".kfb"): slide = TSlide(image) if slide: _width, _height = slide.dimensions output_path = os.path.join(self.output_path, img_name) # 假如目标路径存在,删除文件然后重新写入 if os.path.exists(output_path): shutil.rmtree(output_path) os.makedirs(output_path, exist_ok=True) # 创建进程池 executor = ProcessPoolExecutor( max_workers=cfg.slice.SLICE_PROCESS_NUM) t1 = datetime.datetime.now() print("Adding Job to Pool...") # 按行读取,仅读取图像中间(指定比例)位置 x, y, width, height = int(_width * cfg.slice.AVAILABLE_PATCH_START_RATIO), \ int(_height * cfg.slice.AVAILABLE_PATCH_START_RATIO), \ int(_width * cfg.slice.AVAILABLE_PATCH_END_RATIO), \ int(_height * cfg.slice.AVAILABLE_PATCH_END_RATIO) # 收集任务结果 tasks = [] while x < width: tasks.append( executor.submit(worker, image, x, y, height, cfg.slice.WIDTH, cfg.slice.HEIGHT, cfg.slice.DELTA, output_path)) x += cfg.slice.DELTA t2 = datetime.datetime.now() job_count = len(tasks) print( "Done, cost: %s, Total Job Count: %s, Worker Count: %s" % ((t2 - t1), job_count, cfg.slice.SLICE_PROCESS_NUM)) # 计数器 patch_count = 0 for future in as_completed(tasks): queue = future.result() count = len(queue) patch_count += count job_count -= 1 print( "One Job Done, Got %s patches, last Job Count: %s" % (count, job_count)) t3 = datetime.datetime.now() print( "File - %s, Size: (%s, %s), Got Patch Num %s, Total cost time: %s" % (img_name, _width, _height, patch_count, t3 - t0)) print(".jpg files saved path: %s" % output_path) done.append(output_path) else: fail.append({ 'name': img_name, 'err': 'unsupported file format' }) except Exception as e: raise fail.append({'name': image, 'err': str(e)}) return {'done': done, 'fail': fail}
def generate_patch_of_annotated_tiles(self): # Annotated tiles positive images will be saved in Mat_files/pos directory # Background tiles will be saved in Mat_files directory # .mat files from this will be used to generate tf records file file_names_list = [ fname for fname in os.listdir(self.input_slide_dir) if fname.endswith(self.ext) is True ] cws_dir = "cws" if os.path.exists(os.path.join(self.output_dir, cws_dir)) is False: os.makedirs(os.path.join(self.output_dir, cws_dir)) for slide in file_names_list: if os.path.exists(os.path.join(self.output_dir, cws_dir, slide)) is False: os.makedirs(os.path.join(self.output_dir, cws_dir, slide)) if os.path.exists( os.path.join(self.output_dir, cws_dir, slide, "img_mask")) is False: os.makedirs( os.path.join(self.output_dir, cws_dir, slide, "img_mask")) if os.path.exists( os.path.join(self.output_dir, cws_dir, slide, "Mat_files")) is False: os.makedirs( os.path.join(self.output_dir, cws_dir, slide, "Mat_files")) osr = openslide.OpenSlide(os.path.join(self.input_slide_dir, slide)) level = 0 ds = osr.level_downsamples[level] w, h = osr.level_dimensions[0] ############################################################ #Uncomment this section if you have not created the cws/image #tiles of size 2000x2000 from the raw whole slide images. ############################################################ width = 2000 height = 2000 k = 0 for j in range(0, h, 2000): for i in range(0, w, 2000): if (i + 2000 > w): width = w - i else: width = 2000 if (j + 2000 > h): height = h - j else: height = 2000 height = int(height / ds) width = int(width / ds) out = osr.read_region((i, j), level, (width, height)) temp = np.array(out) temp = temp[:, :, 0:3] out = Image.fromarray(temp) out.save( os.path.join(self.output_dir, cws_dir, slide + '/Da' + str(k) + ".jpg")) k += 1 mask_path = os.path.join(self.output_dir, cws_dir, slide, "img_mask") doc = xml.dom.minidom.parse( os.path.join(self.input_slide_dir, os.path.splitext(slide)[0] + '.xml')) Region = doc.getElementsByTagName("Region") X = [] Y = [] i = 0 for Reg in Region: X.append([]) Y.append([]) Vertex = Reg.getElementsByTagName("Vertex") for Vert in Vertex: X[i].append(int(round(float(Vert.getAttribute("X"))))) Y[i].append(int(round(float(Vert.getAttribute("Y"))))) i += 1 i1 = 0 points = [] for j in range(0, h, 2000): for i in range(0, w, 2000): img = io.imread( os.path.join(self.output_dir, cws_dir, slide, 'Da' + str(i1) + ".jpg")) [hh, ww, cc] = img.shape blank_image = np.zeros((hh, ww), np.uint8) for k in range(len(X)): #print("######") if i < max(X[k]) and i + 2000 > min(X[k]) and j < max( Y[k]) and j + 2000 > min(Y[k]): points = [] for i3 in range(len(X[k])): points.append([ int((X[k][i3] - i) / ds), int((Y[k][i3] - j) / ds) ]) pts = np.array(points, np.int32) pts = pts.reshape((-1, 1, 2)) cv2.drawContours(blank_image, [pts], 0, (255), -1) cv2.imwrite( os.path.join(mask_path, 'Da' + str(i1) + ".jpg"), blank_image) i1 += 1 mat_path = os.path.join(self.output_dir, cws_dir, slide, "Mat_files") positive_patch_path = os.path.join(mat_path, "pos") if os.path.exists(positive_patch_path) is False: os.makedirs(positive_patch_path) list_img = os.listdir(mask_path) list_img = sorted(list_img, key=natural_key) for list11 in list_img: gray = Image.open(os.path.join(mask_path, list11)) tempgray = np.array(gray) gray1 = np.where(tempgray == 255) temp = np.array(gray) y = np.expand_dims(temp, axis=-1) ######################### create sliding patches using Patches and Slidingpatches #################################################### if len(gray1[0]) != 0: img = Image.open( os.path.join(self.output_dir, cws_dir, slide, list11)) tempimg = np.array(img) mask_patch_obj = Patches(img_patch_h=600, img_patch_w=600, stride_h=400, stride_w=400, label_patch_h=600, label_patch_w=600) img_patch_obj = Slidingpatches(img_patch_h=600, img_patch_w=600, stride_h=400, stride_w=400, label_patch_h=600, label_patch_w=600) train_patch = mask_patch_obj.extract_patches(y) train_img_patch = img_patch_obj.extract_patches(tempimg) ###########################Sanity check for 2000x2000 tile size, without any stride 25 patches will be generated################################## print(train_patch.shape[0]) #print(train_img_patch.shape[0]) for i in range(0, train_patch.shape[0]): #sanity = np.where(np.squeeze(train_patch[i])) == 255 img_bw = cv2.findNonZero(np.squeeze(train_patch[i])) if not img_bw is None: data = {} data['im'] = train_img_patch[i] data['Mask'] = np.squeeze(train_patch[i]) GT = {} GT['GT'] = data iosci.savemat( os.path.join( positive_patch_path, list11[:-4] + "_" + str(i) + "_" + os.path.splitext(slide)[0] + ".mat"), GT) else: data = {} data['im'] = train_img_patch[i] data['Mask'] = np.squeeze(train_patch[i]) GT = {} GT['GT'] = data iosci.savemat( os.path.join( mat_path, list11[:-4] + "_" + str(i) + "_" + os.path.splitext(slide)[0] + ".mat"), GT)