def Apply(self, slidePath, maskPath, outputDir, tumorFolderName, maskFolderName): '''Greate Dataset by dividing slide into patches. The result will be stored into outputPath/folderName ''' slide = openslide.open_slide(slidePath) mask = openslide.open_slide(maskPath) max_level = mask.level_count - 1 if mask.level_count < slide.level_count else slide.level_count - 1 if(self._fetchingLevel>max_level or self._fetchingLevel<0): print "the level to fetch data is out of the range of TIFF image" return 0; splits = slidePath.split("/") tiffImgName = splits[-1] dataName = tiffImgName.split('.tif')[0] slidePathDir = outputDir + '/' +tumorFolderName if os.path.exists(slidePathDir) is False: os.system('mkdir '+slidePathDir) maskPathDir = outputDir + '/' +maskFolderName if os.path.exists(maskPathDir) is False: os.system('mkdir '+maskPathDir) level_size = slide.level_dimensions[self._fetchingLevel] zero_level_size = slide.level_dimensions[0] window_H = window_W = int(level_size[0]/self._win_propotion) windowShape = (window_H, window_W) h = w = 0 step = int(zero_level_size[0]/self._win_propotion) while(h<zero_level_size[0]): while(w<zero_level_size[1]): if ( h + step > zero_level_size[0] ): h = zero_level_size[0] - step if ( w + step > zero_level_size[1] ): w = zero_level_size[1] - step slideTile = self._GetPatch(slide, h, w, windowShape, self._fetchingLevel) maskTile = self._GetPatch(mask, h, w, windowShape, self._fetchingLevel) b,g,r,a = cv2.split(slideTile) slideTile = cv2.merge([b,g,r]) b,g,r,a = cv2.split(maskTile) maskTile = cv2.merge([b,g,r]) if ( maskTile.max()>100 ): slidePathFile = slidePathDir + '/' + dataName + '_' + str(self._patchID) + '.tif' maskPathFile = maskPathDir + '/' + dataName + '_Mask_' + str(self._patchID) + '.tif' cv2.imwrite( slidePathFile, slideTile ) cv2.imwrite( maskPathFile, maskTile ) self._patchID = self._patchID + 1 w = w + step w = 0 h = h + step
def openslide_test_file(full_file_path, file_type, db_cursor): """This will use the openslide bindings to get the width, height and filesize for an image or return an Error otherwise""" width = height = filesize = orig_resolution = slide_title = md5 = None try: im = openslide.open_slide(full_file_path) (width, height) = im.dimensions base_file_name = os.path.basename(full_file_path) filesize = os.path.getsize(full_file_path) if file_type == "svs": orig_resolution = im.properties["aperio.AppMag"] # md5 = md5Checksum(full_file_path) slide_name = os.path.basename(full_file_path) return (True, width, height, filesize, orig_resolution, slide_name, md5) except OpenSlideError, e: print "Openslide returned an error", full_file_path print >> sys.stderr, "Verify failed with:", repr(e.args) print "Openslide returned an error", full_file_path f_out.write(full_file_path + ";\n") insert_corrupt_batch_stmt = ( "insert into `corrupt_or_unreadable_%s_files` (full_file_name,filesize) Values ('%s',%d) " ) print insert_corrupt_batch_stmt % (file_type, full_file_path, os.path.getsize(full_file_path)) # update_cursor.execute( insert_corrupt_batch_stmt % (full_file_path,os.path.getsize(full_file_path) )) return (False, None, None, None, None, None, None)
def load_slide(): slidefile = app.config['DEEPZOOM_SLIDE'] if slidefile is None: raise ValueError('No slide file specified') config_map = { 'DEEPZOOM_TILE_SIZE': 'tile_size', 'DEEPZOOM_OVERLAP': 'overlap', 'DEEPZOOM_LIMIT_BOUNDS': 'limit_bounds', } opts = dict((v, app.config[k]) for k, v in config_map.items()) slide = open_slide(slidefile) app.slides = { SLIDE_NAME: DeepZoomGenerator(slide, **opts) } app.associated_images = [] app.slide_properties = slide.properties for name, image in slide.associated_images.items(): app.associated_images.append(name) slug = slugify(name) app.slides[slug] = DeepZoomGenerator(ImageSlide(image), **opts) try: mpp_x = slide.properties[openslide.PROPERTY_NAME_MPP_X] mpp_y = slide.properties[openslide.PROPERTY_NAME_MPP_Y] app.slide_mpp = (float(mpp_x) + float(mpp_y)) / 2 except (KeyError, ValueError): app.slide_mpp = 0
def AddDataset(self, slidePath, maskPath): slideFileName = slidePath.split('/')[-1] dataName = slideFileName.split('.tif')[0] mask = openslide.open_slide(maskPath) window_H = window_W = self._win_size windowShape = (window_H, window_W) if os.path.exists(self._VGGcsvDir + "/" +dataName+".csv"): #read coordinates from .txt file coorPath = self._VGGcsvDir + "/" + dataName + ".csv" file = open( coorPath,'r') coor_lines = file.readlines() for i in xrange(len(coor_lines)): line = coor_lines[i] elems = line.rstrip().split(',') #labelCoor = int(elems[0]) WCoor = int(elems[1]) HCoor = int(elems[2]) maskTile = self._GetPatch(mask, WCoor- windowShape[0]/2, HCoor- windowShape[1]/2, windowShape, self._workingLevel) r2,g2,b2,a2 = cv2.split(maskTile) maskTile = cv2.merge([r2]) if maskTile.max()<100: self._neg_coor_list.append([WCoor, HCoor, 0, slidePath, maskPath]) elif maskTile[maskTile.shape[0]/2][maskTile.shape[1]/2] > 100: self._pos_coor_list.append([WCoor, HCoor, 1, slidePath, maskPath]) print "Add %s Successfully!!"%dataName return True else: print "Failure to find the VGGcsv file: " + self._VGGcsvDir + "/" +dataName+".csv" return False
def ROI(name,ref_level=4,disk_size=4,thresh=None,black_spots=None,number_of_pixels_max=1000000,verbose=False): if '/' in name: cut=name.split('/')[-1] folder=cut.split('.')[0] else: folder=name.split(".")[0] slide = openslide.open_slide(name) lowest_res=slide.level_count-2 s=np.array(slide.read_region((0,0),lowest_res,slide.level_dimensions[lowest_res]))[:,:,0] binary=Mask_ROI_cl(s,disk_size,thresh=thresh,black_spots=black_spots) stru = [[1,1,1],[1,1,1],[1,1,1]] blobs, number_of_blobs = ndimage.label(binary,structure=stru) list_roi=[] ### pd.DataFrame(columns=['x_0','y_0','w','h','res']) for i in range(1,number_of_blobs): y,x=np.where(blobs == i) x_0=min(x) y_0=min(y) w=max(x)-x_0 h=max(y)-y_0 new_x,new_y=get_X_Y(slide,x_0,y_0,lowest_res) list_roi=Best_Finder_rec(slide,lowest_res,new_x,new_y,w,h,-1,"./"+folder+"/"+folder,ref_level,list_roi,number_of_pixels_max,verbose) list_roi=np.array(list_roi) return(list_roi)
def Worst_Slicer(name,lamb,ref_level=0,Mask_=False): if '/' in name: cut=name.split('/')[-1] folder=cut.split('.')[0] else: folder=name.split(".")[0] if Mask_: pieces=name.split('/')[:-2] folder_mask=folder+"_Mask" Mask_adresse="" for i in range(len(pieces)): Mask_adresse+=pieces[i]+"/" Mask_adresse+=folder_mask.split("_")[0]+"_Mask"+"/"+folder_mask+".tif" else: Mask_adresse=None if not os.path.exists(folder): os.makedirs(folder) slide = openslide.open_slide(name) level =slide.level_count-2 size_x=int(slide.level_dimensions[level][0]) size_y=int(slide.level_dimensions[level][1]) Best_Slicer_rec(slide,level,0,0,size_x,size_y,lamb,"./"+folder+"/"+folder,ref_level,Mask_adresse)
def getInfo(i ): print("filename: {0}, path: {1}\n".format(Slides[i], fname_map[Slides[i]])) slidepath = fname_map[Slides[i]] _slide = open_slide(slidepath) #print slide information ''' print("Information about the image: \n" "level_count= {0}\n" "dimensions= {1}\n" "level_dimensions= {2}\n" "level_downsamples= {3}\n" "properties= {4}\n" "associated_images= {5}\n".format(_slide.level_count, _slide.dimensions, _slide.level_dimensions, _slide.level_downsamples, _slide.properties, _slide.associated_images)) ''' #read region x = int(float(Xs[i])) y = int(float(Ys[i])) print("x={0},y={1}\n".format(x,y)) size = 50; #print("half={0}".format(size/2)) img_region = _slide.read_region([x-size/2,y-size/2],0,[size,size]) filename = Slides[i] directory = '{1}/{0}'.format(Classes[i], size) if not os.path.exists(directory): os.makedirs(directory) img_region.save("{3}/{0}_{1}_{2}.jpg".format(filename[:-4], x, y, directory), "JPEG") _slide.close()
def ApplyToSlideWrite(slide, table, f, outputfilename=None): # Slide is a string of the location of the file # This function applies a function f to the whole slide, this slide is given as input with a table # which contains all the patches on which to apply the function. # Their is also a optionnal outputfilename # table is a iterable where each element has 5 attributes: # x, y, w, h, res input_slide = openslide.open_slide(slide) outputfilename = outputfilename if outputfilename is not None else "F_" + slide dim1, dim2 = input_slide.dimensions #output_slide = Vips.Image.black(dim1, dim2) red_channel = Vips.Image.black(dim1, dim2) green_channel = Vips.Image.black(dim1, dim2) blue_channel = Vips.Image.black(dim1, dim2) for i in range(len(table)): if i % 10 == 0: print "process: {} / {} ".format(i, len(table)) image = np.array(GetImage(input_slide, table[i]))[:, :, :3] image = f(image) red_part = Vips.Image.new_from_array(image[:, :, 0].tolist()) green_part = Vips.Image.new_from_array(image[:, :, 1].tolist()) blue_part = Vips.Image.new_from_array(image[:, :, 2].tolist()) red_channel = red_channel.insert(red_part, table[i][0], table[i][1]) green_channel = green_channel.insert( green_part, table[i][0], table[i][1]) blue_channel = blue_channel.insert(blue_part, table[i][0], table[i][1]) #output_slide = output_slide.insert(image, table[i][0], table[i][1]) print "lets join the slides" rgb = red_part.bandjoin([green_part, blue_part]) rgb.write_to_file(outputfilename)
def openslide_test_file_mongo(full_file_path,file_type,db_cursor): """This will use the openslide bindings to get the width, height and filesize for an image or return an Error otherwise""" width=height=filesize=orig_resolution=slide_title=md5 = None try: im = openslide.open_slide(full_file_path) (width, height) = im.dimensions base_file_name = os.path.basename(full_file_path) filesize = os.path.getsize(full_file_path) if(file_type== 'svs'): try: orig_resolution = im.properties['aperio.AppMag'] except: orig_resolution = 'UnkSVSReadError' elif(file_type == 'ndpi'): orig_resolution = 40 #md5 = md5Checksum(full_file_path) md5 = None slide_name = os.path.basename(full_file_path) sld_properties = im.properties return(True,width,height,filesize,orig_resolution,slide_name,md5,sld_properties) except OpenSlideError, e: print "Openslide returned an error",full_file_path print >>sys.stderr, "Verify failed with:", repr(e.args) print "Openslide returned an error",full_file_path # f_out.write(full_file_path+';\n') print "SHIT IT DIED!" db_cursor['CDSA_LoadErrors']['corrupt_slides'].insert( { 'full_file_name': full_file_path, 'file_type': file_type, 'filesize': os.path.getsize(full_file_path) } ) return(False,None,None,None,None,None,None,None)
def open_slide(slide_num, folder, training): """ Open a whole-slide image, given an image number. Args: slide_num: Slide image number as an integer. folder: Directory in which the slides folder is stored, as a string. This should contain either a `training_image_data` folder with images in the format `TUPAC-TR-###.svs`, or a `testing_image_data` folder with images in the format `TUPAC-TE-###.svs`. training: Boolean for training or testing datasets. Returns: An OpenSlide object representing a whole-slide image. """ if training: filename = os.path.join(folder, "training_image_data", "TUPAC-TR-{}.svs".format(str(slide_num).zfill(3))) else: # Testing images filename = os.path.join(folder, "testing_image_data", "TUPAC-TE-{}.svs".format(str(slide_num).zfill(3))) try: slide = openslide.open_slide(filename) except OpenSlideError: slide = None except FileNotFoundError: slide = None return slide
def __init__(self, slidepath, basename, format, tile_size, overlap, limit_bounds, quality, workers, with_viewer, Bkg, basenameJPG, xmlfile, mask_type, ROIpc, oLabel): if with_viewer: # Check extra dependency before doing a bunch of work import jinja2 print("line226 - %s " % (slidepath) ) self._slide = open_slide(slidepath) self._basename = basename self._basenameJPG = basenameJPG self._xmlfile = xmlfile self._mask_type = mask_type self._format = format self._tile_size = tile_size self._overlap = overlap self._limit_bounds = limit_bounds self._queue = JoinableQueue(2 * workers) self._workers = workers self._with_viewer = with_viewer self._Bkg = Bkg self._ROIpc = ROIpc self._dzi_data = {} self._xmlLabel = oLabel for _i in range(workers): TileWorker(self._queue, slidepath, tile_size, overlap, limit_bounds, quality, self._Bkg, self._ROIpc).start()
def openslide_test_file(full_file_path,file_type='svs'): """This will use the openslide bindings to get the width, height and filesize for an \ image or return an Error otherwise""" width=height=filesize=orig_resolution=slide_title=md5 = None ##TODO: Look into adding a file type which by looking at the extension? extension = os.path.splitext(full_file_path)[1] if extension not in ['.ndpi','.svs']: #Should just return gracefully?"" print extension return( False, None, None, None, None, None, None, None) try: im = openslide.open_slide(full_file_path) (width, height) = im.dimensions base_file_name = os.path.basename(full_file_path) filesize = os.path.getsize(full_file_path) if(file_type== 'svs'): try: orig_resolution = im.properties['aperio.AppMag'] except: orig_resolution = 'UnkSVSReadError' elif(file_type == 'ndpi'): orig_resolution = 40 #md5 = md5Checksum(full_file_path) md5 = None slide_name = os.path.basename(full_file_path) sld_properties = im.properties return(True,width,height,filesize,orig_resolution,slide_name,md5,sld_properties) except OpenSlideError, e: print "Openslide returned an error",full_file_path print >>sys.stderr, "Verify failed with:", repr(e.args) print "Openslide returned an error",full_file_path
def checking_slide(slide_name): try: slide = openslide.open_slide(slide_name) if 'Tumor' in slide_name: slide_name_list = slide_name.split('/') mask = "_Mask" slide_name_list[0]='/' slide_name_list[-2] +=mask slide_name_list[-1] = slide_name_list[-1].split('.')[0]+mask+'.tif' cm = os.path.join(*slide_name_list) list_ROI = ROI(slide_name, ref_level = 0, disk_size = 4, thresh = 220, black_spots = 20, number_of_pixels_max = 1000000, method = 'SP_ROI', mask_address = cm, N_squares = 4, verbose = False ) else: list_ROI = ROI(slide_name, ref_level = 0, disk_size = 4, thresh = 220, black_spots = 20, number_of_pixels_max = 1000000, method = 'SP_ROI', mask_address = None, N_squares = 4, verbose = False ) for para in list_ROI: sample = GetImage(slide,para) except: return False return True
def Best_Slicer_rec(slide,level,x_0,y_0,size_x,size_y,lamb,image_name,ref_level,Mask_adresse=None): if level==ref_level: if size_x*size_y<1000000: ##size of level 3 croped=slide.read_region((x_0,y_0), level, (size_x,size_y) ) test=variability_val(np.array(croped)) if test>lamb: croped.save(image_name+"_"+str(x_0)+"_"+str(y_0)+".png") if Mask_adresse is not None: slide_mask = openslide.open_slide(Mask_adresse) croped_mask=slide_mask.read_region((x_0,y_0), level, (size_x,size_y) ) croped_mask.save(image_name+"_"+str(x_0)+"_"+str(y_0)+"_Mask"+".png") else: size_x_new=int(size_x*0.5) size_y_new=int(size_y*0.5) diese_str="#"*level*10 print diese_str +"split level "+ str(level) x_1=x_0+size_x_new y_1=y_0+size_y_new image_name=image_name+"_Split_id_"+str(random.randint(0, 1000)) Best_Slicer_rec(slide,level,x_0,y_0,size_x_new,size_y_new,lamb,image_name,ref_level,Mask_adresse) Best_Slicer_rec(slide,level,x_1,y_0,size_x_new,size_y_new,lamb,image_name,ref_level,Mask_adresse) Best_Slicer_rec(slide,level,x_0,y_1,size_x_new,size_y_new,lamb,image_name,ref_level,Mask_adresse) Best_Slicer_rec(slide,level,x_1,y_1,size_x_new,size_y_new,lamb,image_name,ref_level,Mask_adresse) else: croped=slide.read_region((x_0,y_0), level, (size_x,size_y) ) test=variability_val(np.array(croped)) if test>lamb or level > 1: if size_x*size_y>1000000: ##size of level 3 size_x_new,size_y_new=get_size(slide,size_x,size_y,level,level-1) size_x_new=int(size_x_new*0.5) size_y_new=int(size_y_new*0.5) diese_str="#"*level*10 print diese_str +"split level "+ str(level) width_x_0,height_y_0=get_size(slide,size_x,size_y,level,0) x_1=x_0+int(width_x_0*0.5) y_1=y_0+int(height_y_0*0.5) Best_Slicer_rec(slide,level-1,x_0,y_0,size_x_new,size_y_new,lamb,image_name,ref_level,Mask_adresse) Best_Slicer_rec(slide,level-1,x_1,y_0,size_x_new,size_y_new,lamb,image_name,ref_level,Mask_adresse) Best_Slicer_rec(slide,level-1,x_0,y_1,size_x_new,size_y_new,lamb,image_name,ref_level,Mask_adresse) Best_Slicer_rec(slide,level-1,x_1,y_1,size_x_new,size_y_new,lamb,image_name,ref_level,Mask_adresse) else: size_x_new,size_y_new=get_size(slide,size_x,size_y,level,level-1) Best_Slicer_rec(slide,level-1,x_0,y_0,size_x_new,size_y_new,lamb,image_name,ref_level,Mask_adresse) else: print "Not enough variability on second split"
def PredOneImage(slide, para, outfile, f, options): # pdb.set_trace() slide = openslide.open_slide(slide) image = np.array(GetImage(slide, para))[:,:,:3] image, table, bin, prob = f(image, marge=options.marge, marge_cut_off=options.marge_cut_off) imsave(outfile, image, resolution=[1.0,1.0]) np.save(outfile.replace('.tiff', ".npy").replace("tiled", "table"), table) imsave(outfile.replace("tiled", "bin"), bin, resolution=[1.0,1.0]) imsave(outfile.replace("tiled", "prob"), img_as_ubyte(prob), resolution=[1.0,1.0])
def GetImage(c, para): ## Returns cropped image given a set of parameters if len(para)!=5: print "Not enough parameters..." elif isinstance(c,str): sample=openslide.open_slide(c).read_region((para[0],para[1]),para[4],(para[2],para[3])) else: sample=c.read_region((para[0],para[1]),para[4],(para[2],para[3])) return(sample)
def GetWholeImage(c, level =None): if isinstance(c,str): c=openslide.open_slide(c) if level is None: level = c.level_count - 1 elif level > c.level_count - 1: print " level ask is too low... It was setted accordingly" sample = c.read_region((0,0), level, c.level_dimensions[level]) return sample
def Train(self, DbatchSize = 60,trainStep = 60, trainTimes = 10): #Begin Training window_H = window_W = self._win_size windowShape = (window_H, window_W) prob_P = 0.4 LEN_POS = len(self._pos_coor_list) LEN_NEG = len(self._neg_coor_list) print "Positive Number = %d, Negtive Number = %d" %(LEN_POS, LEN_NEG) print "Positive Sampling Probability = %f"%(prob_P) for train_time in xrange(trainTimes): datasetT = [] sldNmT = [] mskNmT = [] img_list = [] label_list = [] for dz in xrange(DbatchSize): if random.uniform(0.0, 1.0) < prob_P: randN = random.randint(0, LEN_POS-1) datasetT.append(self._pos_coor_list[randN]) if self._pos_coor_list[randN][3] not in sldNmT: sldNmT.append(self._pos_coor_list[randN][3]) mskNmT.append(self._pos_coor_list[randN][4]) else: randN = random.randint(0, LEN_NEG-1) datasetT.append(self._neg_coor_list[randN]) if self._neg_coor_list[randN][3] not in sldNmT: sldNmT.append(self._neg_coor_list[randN][3]) mskNmT.append(self._neg_coor_list[randN][4]) for i_sld in xrange(len(sldNmT)): slide = openslide.open_slide(sldNmT[i_sld]) for i_dsT in xrange(len(datasetT)): if datasetT[i_dsT][3] == sldNmT[i_sld]: WCoor = datasetT[i_dsT][0] HCoor = datasetT[i_dsT][1] labelCoor = datasetT[i_dsT][2] slideTile = self._GetPatch(slide, WCoor- windowShape[0]/2, HCoor- windowShape[1]/2, windowShape, self._workingLevel) slideTile = slideTile.astype('float32') r,g,b,a = cv2.split(slideTile) slideTile_sw = np.array([r-185, g-50, b-185]) pos = random.randint(0,len(img_list)) img_list.insert(pos, slideTile_sw) label_list.insert(pos, labelCoor) data = np.array(img_list).astype('float32') labels = np.array(label_list).astype('float32') self._solver.net.set_input_arrays(data, labels) self._solver.step(trainStep) del data del labels del img_list[:] del label_list[:] del sldNmT[:] del mskNmT[:]
def ProcessOneImage(slide, f, output, options): size_images = 224 if options.size is None else options.size list_of_para = ROI(slide, method="grid_fixed_size", ref_level=0, seed=42, fixed_size_in=(size_images, size_images)) size_x, size_y = openslide.open_slide(slide_name).dimensions #list_of_para = list_of_para[10:100] temp_out = ApplyToSlideWrite(slide, list_of_para, f) WritteTiffFromFiles(temp_out, output, size_x, size_y) CleanTemp(temp_out)
def run(self): self._slide = open_slide(self._slidepath) last_associated = None dz = self._get_dz() while True: data = self._queue.get() if data is None: self._queue.task_done() break associated, level, address, outfile = data if last_associated != associated: dz = self._get_dz(associated) last_associated = associated tile = dz.get_tile(level, address) tile.save(outfile, quality=self._quality) self._queue.task_done()
def __init__(self, slidepath, basename, format, tile_size, overlap, quality, workers, with_viewer): if with_viewer: # Check extra dependency before doing a bunch of work import jinja2 self._slide = open_slide(slidepath) self._basename = basename self._format = format self._tile_size = tile_size self._overlap = overlap self._queue = JoinableQueue(2 * workers) self._workers = workers self._with_viewer = with_viewer self._dzi_data = {} for _i in range(workers): TileWorker(self._queue, slidepath, tile_size, overlap, quality).start()
def tile(file1): # Check if directory exists. If it does, delete the old one and make a new one. if os.path.isdir(tiledir + svsimage) == True: shutil.rmtree(tiledir + svsimage) else: pass os.mkdir(tiledir + svsimage) time0 = time.time() # Here starts the actual tiling code # Opens slide object as 'img'. Not sure how this is different from op.OpenSlide(file1) img = op.open_slide(file1) # Open slide in "deepzoom" for tiling deep = op.deepzoom.DeepZoomGenerator(img, tile_size=tiledim, overlap=0, limit_bounds=False) # Record which "level" in deepzoom the image is in (biggest image), and calculate how many tiles to save level_count = deep.level_tiles.index(max(deep.level_tiles)) tile_x, tile_y = max(deep.level_tiles) image_count = 1 kept_tiles = 0 for a in range( 0, tile_y): for b in range( 0, tile_x): # Extract tile from deepzoom image cropped_image = deep.get_tile( (level_count), (b, a) ) # Now analyze the tile: convert to gray and extract data # gim = cropped_image.convert( 'L' ) # pixels = list(gim.getdata()) # # Find the average pixel intensity # level = sum(pixels) / len(pixels) # # If average intensities are too close to "black" or "white", omit the deepzoom tile # if level >= 230 or level <= 25: # pass # # Otherwise save the crop # else: cropped_image.save( './' + tiledir + svsimage + '/' + str( image_count ) + '.tiff', 'TIFF' ) kept_tiles += 1 image_count += 1 print "Saved %r tiles, omitted %r tiles. Process took %r seconds." % (kept_tiles, image_count - kept_tiles, round((time.time() - time0), 2))
def load_slide(): slidefile = app.config['DEEPZOOM_SLIDE'] if slidefile is None: raise ValueError('No slide file specified') config_map = { 'DEEPZOOM_TILE_SIZE': 'tile_size', 'DEEPZOOM_OVERLAP': 'overlap', } opts = dict((v, app.config[k]) for k, v in config_map.iteritems()) slide = open_slide(slidefile) app.slides = { SLIDE_NAME: DeepZoomGenerator(slide, **opts) } app.associated_images = [] app.slide_properties = slide.properties for name, image in slide.associated_images.iteritems(): app.associated_images.append(name) slug = slugify(name) app.slides[slug] = DeepZoomGenerator(ImageSlide(image), **opts)
def GetImage(c,para): ## Returns cropped image given a set of parameters if len(para)!=5: print "Not enough parameters..." elif isinstance(c,str): sample=openslide.open_slide(c).read_region((para[0],para[1]),para[4],(para[2],para[3])) else: sample=c.read_region((para[0],para[1]),para[4],(para[2],para[3])) #pdb.set_trace() # do color deconvolution on the sample image. dec = deconv.Deconvolution() dec.params['image_type'] = 'HEDab' np_img = np.array(sample) dec_img = dec.colorDeconv(np_img[:,:,:3]) new_img = Image.fromarray(dec_img.astype('uint8')) return(new_img)
def OpenslideGetImageMetadata(full_file_path): """This will use the openslide bindings to get the width, height and filesize for an image or return an Error otherwise""" width=height=filesize=orig_resolution=slide_title=md5 = None ## I am going to make the crazy assumption that if the file ends with .SVS it's an SVS ## and if it ends with .NDPI it's an.. NDPI ## This matters bcecause of the way certain image properties are mapped try: im = openslide.open_slide(full_file_path) (width, height) = im.dimensions base_file_name = os.path.basename(full_file_path) filesize = os.path.getsize(full_file_path) #print base_file_name,filesize,im if base_file_name.endswith('svs') : try: orig_resolution = im.properties['aperio.AppMag'] except: orig_resolution = 'UnkSVSReadError' elif base_file_name.endswith('ndpi'): try: orig_resolution = im.properties['openslide.objective-power'] except: orig_resolution = 'UnkNDPIReadError' ###WIP: This is very likely not true in all cases-- just happens to be true @ Emory else: """NEED TO ADD CODE TO OPEN OTHER FILE TYPES?? LIKE A TIFF.. NOT SURE WHAT HAPENS""" print "Can't open",base_file_name sys.exit() sldScan_properties = im.properties sldMetaData = { 'width': width, 'height': height, 'orig_resolution': orig_resolution, 'scanProperties': sldScan_properties} return(True,sldMetaData) except OpenSlideError, e: #print "Openslide returned an error",full_file_path #print >>sys.stderr, "Verify failed with:", repr(e.args) #print "Openslide returned an error",full_file_path #eclean = clean_openslide_keys(e), 'ErrorCode': eclean} return(False,{'FileWErrors': full_file_path, 'ErrorType': 'OpenSlideError'})
def computeEvaluationMask(maskDIR, resolution, level): """Computes the evaluation mask. Args: maskDIR: the directory of the ground truth mask resolution: Pixel resolution of the image at level 0 level: The level at which the evaluation mask is made Returns: evaluation_mask """ slide = openslide.open_slide(maskDIR) dims = slide.level_dimensions[level] pixelarray = np.zeros(dims[0]*dims[1], dtype='uint') pixelarray = np.array(slide.read_region((0,0), level, dims)) distance = nd.distance_transform_edt(255 - pixelarray[:,:,0]) Threshold = 75/(resolution * pow(2, level) * 2) # 75µm is the equivalent size of 5 tumor cells binary = distance < Threshold filled_image = nd.morphology.binary_fill_holes(binary) evaluation_mask = measure.label(filled_image, connectivity = 2) return evaluation_mask
def run(self): self._slide = open_slide(self._slidepath) last_associated = None dz = self._get_dz() while True: data = self._queue.get() if data is None: self._queue.task_done() break #associated, level, address, outfile = data associated, level, address, outfile, format, outfile_bw, PercentMasked = data if last_associated != associated: dz = self._get_dz(associated) last_associated = associated #try: if True: try: tile = dz.get_tile(level, address) # A single tile is being read #check the percentage of the image with "information". Should be above 50% gray = tile.convert('L') bw = gray.point(lambda x: 0 if x<220 else 1, 'F') arr = np.array(np.asarray(bw)) avgBkg = np.average(bw) bw = gray.point(lambda x: 0 if x<220 else 1, '1') # check if the image is mostly background if avgBkg <= (self._Bkg / 100): # if an Aperio selection was made, check if is within the selected region if PercentMasked >= (self._ROIpc / 100.0): #if PercentMasked > 0.05: tile.save(outfile, quality=self._quality) #print("%s good: %f" %(outfile, avgBkg)) #elif level>5: # tile.save(outfile, quality=self._quality) #print("%s empty: %f" %(outfile, avgBkg)) self._queue.task_done() except: print(level, address) print("image %s failed at dz.get_tile for level %f" % (self._slidepath, level)) self._queue.task_done()
def check_mpp(self, patient_id, file_name): body, ext = os.path.splitext(file_name) if ext not in self.png: file_path = os.path.join(self.annotation_dir, self.staining_dir, patient_id, file_name) '''close the slide previously opened''' if not (self.slide is None): self.slide.close() self.slide = openslide.open_slide(file_path) '''mpp indicates the number of pixels per micrometer.''' mpp_x = float(self.slide.properties[openslide.PROPERTY_NAME_MPP_X]) mpp_y = float(self.slide.properties[openslide.PROPERTY_NAME_MPP_Y]) else: properties = self.target_list[body] if properties is not None: mpp_x = float(properties['mpp_x']) mpp_y = float(properties['mpp_y']) else: raise MargeOverlapedGlomusException( 'unknown target file name is given.') return mpp_x, mpp_y
def run(self): self._slide = open_slide(self._slidepath) last_associated = None dz = self._get_dz() while True: data = self._queue.get() if data is None: self._queue.task_done() break #associated, level, address, outfile = data associated, level, address, outfile, format, outfile_bw = data if last_associated != associated: dz = self._get_dz(associated) last_associated = associated #try: if True: try: tile = dz.get_tile(level, address) # A single tile is being read #nc added: check the percentage of the image with "information". Should be above 50% gray = tile.convert('L') bw = gray.point(lambda x: 0 if x < 220 else 1, 'F') arr = np.array(np.asarray(bw)) avgBkg = np.average(bw) bw = gray.point(lambda x: 0 if x < 220 else 1, '1') #outfile = os.path.join(outfile, '%s.%s' % (str(round(avgBkg, 3)),format) ) #outfile_bw = os.path.join(outfile_bw, '%s.%s' % (str(round(avgBkg, 3)),format) ) # bw.save(outfile_bw, quality=self._quality) if avgBkg < (self._Bkg / 100): tile.save(outfile, quality=self._quality) #print("%s good: %f" %(outfile, avgBkg)) #else: #print("%s empty: %f" %(outfile, avgBkg)) self._queue.task_done() except: print(level, address) print("image %s failed at dz.get_tile for level %f" % (self._slidepath, level)) self._queue.task_done()
def __init__(self, slidepath, basename, format, tile_size, overlap, limit_bounds, quality, workers, with_viewer, Bkg, basenameJPG): if with_viewer: # Check extra dependency before doing a bunch of work import jinja2 print("line226 - %s " % (slidepath)) self._slide = open_slide(slidepath) self._basename = basename self._basenameJPG = basenameJPG self._format = format self._tile_size = tile_size self._overlap = overlap self._limit_bounds = limit_bounds self._queue = JoinableQueue(2 * workers) self._workers = workers self._with_viewer = with_viewer self._Bkg = Bkg self._dzi_data = {} for _i in range(workers): TileWorker(self._queue, slidepath, tile_size, overlap, limit_bounds, quality, self._Bkg).start()
def predict_WSI(slide,training_res,pred_WSI_res,classifier_vaia): if slide is str: slide = openslide.open_slide(slide) ROI_para = ROI(name,ref_level=training_res, disk_size=4, thresh=None, black_spots=None, number_of_pixels_max=1000000, verbose=False, marge=0.5, method='grid_etienne') WSI_pred=np.zeros(shape=(slide.level_dimensions[pred_WSI_res][0],slide.level_dimensions[pred_WSI_res][1],2)) for para in ROI_para: sub_image = slide.read_region((para[0],para[1]),para[4],(para[2],para[3])) ### prediction ### image_pred to_insert = change_res_np(image_pred) x0, y0 = get_X_Y_from_0(slide,para[0],para[1],pred_WSI_res) size_x,size_y = get_size(slide, para[2], para[3], training_res, pred_WSI_res) WSI_pred[x0:(x0+size_x),y0:(y0+size_y),0] += to_insert[0:size_x,0:size_y] ###we maybe have to invert x and y WSI_pred[x0:(x0+size_x),y0:(y0+size_y),0] += 1 zeros = np.where(WSI_pred[:,:,1]==0) WSI_pred[zeros,0] = WSI_pred[zeros,0] / WSI_pred[zeros,1] return(WSI_pred[:,:,0])
def run_stainsep(filename,nstains,lamb,output_direc="",background_correction=True): print print "Running stain separation on:",filename level=0 I = openslide.open_slide(filename) xdim,ydim=I.level_dimensions[level] img=np.asarray(I.read_region((0,0),level,(xdim,ydim)))[:,:,:3] print "Fast stain separation is running...." Wi,Hi,Hiv,stains=Faststainsep(I,img,nstains,lamb,level,background_correction) print "\t \t \t \t \t \t Time taken:",elapsed print "Color Basis Matrix:\n",Wi fname=os.path.splitext(os.path.basename(filename))[0] cv2.imwrite(output_direc+fname+"-0_original.png",cv2.cvtColor(img, cv2.COLOR_RGB2BGR)) cv2.imwrite(output_direc+fname+"-1_Hstain.png",cv2.cvtColor(stains[0], cv2.COLOR_RGB2BGR)) cv2.imwrite(output_direc+fname+"-2_Estain.png",cv2.cvtColor(stains[1], cv2.COLOR_RGB2BGR))
def getMask(xmlFile, svsFile, pattern): """ Parses XML File to get mask vertices and returns matrix masks where 1 indicates the pixel is inside the mask, and 0 indicates outside the mask. @param: {string} xmlFile: name of xml file that contains annotation vertices outlining the mask. @param: {string} svsFile: name of svs file that contains the slide image. @param: {pattern} string: name of the xml labeling Returns: slide - openslide slide Object mask - matrix mask of pattern """ vertices = parseXML(xmlFile, pattern) # Parse XML to get vertices of mask if not len(vertices[pattern]): slide = 0 mask = 0 return slide, mask slide = open_slide(svsFile) levelDims = slide.level_dimensions mask = createMask(levelDims, vertices, pattern) return slide, mask
def computeEvaluationMask(maskDIR, resolution, level): """Computes the evaluation mask. Args: maskDIR: the directory of the ground truth mask resolution: Pixel resolution of the image at level 0 level: The level at which the evaluation mask is made Returns: evaluation_mask """ slide = openslide.open_slide(maskDIR) dims = slide.level_dimensions[level] pixelarray = np.zeros(dims[0] * dims[1], dtype='uint') pixelarray = np.array(slide.read_region((0, 0), level, dims)) distance = nd.distance_transform_edt(255 - pixelarray[:, :, 0]) Threshold = 75 / (resolution * pow(2, level) * 2 ) # 75µm is the equivalent size of 5 tumor cells binary = distance < Threshold filled_image = nd.morphology.binary_fill_holes(binary) evaluation_mask = measure.label(filled_image, connectivity=2) return evaluation_mask
def crop(img_id, start, crop_size, type): ''' :param img_id: id of the image with the following format: 01_01_0083 :param start: the top left coordinate for the patch :param crop_size: a tuple stands for the size for each patch :return: np.array of cropped slide patch, mask patch and top left coordinate of the patch ''' start_x, start_y = start slide_path = './data/OriginalImage/' + str(img_id) + '.svs' if not os.path.exists(slide_path): slide_path = './data/OriginalImage/' + str(img_id) + '.SVS' slide = openslide.open_slide(slide_path) croped_slide_img = slide.read_region((start_x, start_y), 0, crop_size) croped_slide_img = np.array(croped_slide_img) mask_path = './data/' + type.capitalize() + 'Mask/' + str( img_id) + '_' + type + '.tif' mask = io.imread(mask_path) croped_mask_img = mask[start_y:start_y + crop_size[0], start_x:start_x + crop_size[1]] return (croped_slide_img, croped_mask_img, start)
def process_svs(slide_path): """ Returns level, grids and appropriate tiles for given slide_path """ start_time = time.time() #read the slides slide = openslide.open_slide(slide_path) #split filepath and filename filepath, file_name = os.path.split(slide_path) generator = DeepZoomGenerator(slide, tile_size=224, overlap=0, limit_bounds=True) highest_zoom_level = generator.level_count - 1 try: #slide is NOT GENERATOR but the given .svs file mag = int(slide.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER]) # need the offset as some .svs files can be either 20X or 40x # objective goes down x2 if level decreases by 1 offset = math.floor((mag / 20) / 2) level = highest_zoom_level - offset except (ValueError, KeyError) as e: level = highest_zoom_level #attain 2.5x level = level - 3 cols, rows = generator.level_tiles[level] #224: tile_size, 0: overlap kept_tiles = [] kept_grids = [] for col in range(cols): print('Finished processing '+ str(col) +'/'+str(cols)+' of slide: '+file_name) for row in range(rows): tile = np.asarray(generator.get_tile(level, (col,row))) if keep_tile(tile, tile.shape[0], 0.75): kept_tiles.append(tile) kept_grids.append((col,row)) end_time = time.time() diff_time = end_time - start_time print('Time took for processing slide ' + file_name +': ',diff_time) return (level,kept_tiles,kept_grids)
def __getitem__(self, idx): index_path = os.path.join(self.rootdir, self.image_index_label.iloc[idx, 0]) self._files_Name = self._list_all_files(index_path) label_class = self.image_index_label.iloc[idx, 1] image_list = [] image_score_list = [] image_auxiliary_list = [] for file_index in self._files_Name: image_path = os.path.join(index_path, file_index) slide = openslide.open_slide(image_path) level_count = slide.level_count [m, n] = slide.dimensions region = np.array(slide.read_region((0, 0), (level_count - 1), (m, n))) region = transforms.ToPILImage()(region).convert('RGB') region_patch = self.testTransforms(region) if (int(torch.sum(region_patch)) < int(0.85 * 3 * 448 * 448)): region_left_top = torch.full((1,), float(torch.sum(region_patch[:, 0:224, 0:224]))) region_left_bottom = torch.full((1,), float(torch.sum(region_patch[:, 0:224, 224:448]))) region_right_top = torch.full((1,), float(torch.sum(region_patch[:, 224:448, 0:224]))) region_right_bottom = torch.full((1,), float(torch.sum(region_patch[:, 224:448, 224:448]))) region_sum = torch.cat((region_left_top,region_left_bottom,region_right_top,region_right_bottom),dim=0) _, index = torch.sort(region_sum,descending=True) image_score = torch.zeros((4,), dtype=torch.float) image_score[index[0]] = 0.1 image_score[index[1]] = 0.1 image_score[index[2]] = 0.4 image_score[index[3]] = 0.4 patch = torch.stack([region_patch[:, 0:224, 0:224], region_patch[:, 0:224, 224:448], region_patch[:, 224:448, 0:224], region_patch[:, 224:448, 224:448]], 0) discriminate_patch = self.testTransformsAuxiliary(region) image_list.append(patch) image_score_list.append(image_score) image_auxiliary_list.append(discriminate_patch) if label_class == 'A': label = torch.ones((1,), dtype=torch.uint8) elif label_class == 'B': label = torch.zeros((1,), dtype=torch.uint8) return image_list, image_auxiliary_list, image_score_list, label
def GenerateMask(wsi_path, trained_model_path, patch_size, magnification, min_color_threshold, max_color_threshold): slide = open_slide(wsi_path) tiles = DeepZoomGenerator(slide, tile_size=patch_size, overlap=0, limit_bounds=True) level = tiles.level_count - int(math.log((40 / magnification), 2)) - 1 x_tiles, y_tiles = tiles.level_tiles[level] model = LoadModel(trained_model_path) binary_mask = np.zeros((y_tiles, x_tiles)) x, y = 0, 0 while y < y_tiles: while x < x_tiles: new_tile = np.array(tiles.get_tile(level, (x, y)), dtype=np.uint8) avg = np.average(new_tile) if (min_color_threshold <= avg <= max_color_threshold): if np.shape(new_tile) == (patch_size, patch_size, 3): if not os.path.exists('Intermediate_Tiles'): os.makedirs('Intermediate_Tiles') filename = "./Intermediate_Tiles/" + str(x) + "_" + str( y) + ".png" scipy.misc.imsave(filename, new_tile) test_image = image.load_img(filename, target_size=(448, 448)) test_image = image.img_to_array(test_image) test_image = np.expand_dims(test_image, axis=0) test_image = preprocess_input(test_image) prob = model.predict(test_image, steps=1) pred = prob.argmax(axis=1) pred = pred[0] if (pred == 0): #If invasive class binary_mask[y][x] = 255 x += 1 y += 1 x = 0 scipy.misc.imsave('binary_mask.png', binary_mask)
def __init__(self, WSI_path, Xml_path, Dimension_path, Mask_truth_path='', Heatmap_path=''): self.WSI_path = WSI_path self.Xml_path = Xml_path self.Mask_truth_path = Mask_truth_path self.Heatmap_path = Heatmap_path self.Dimension_path = Dimension_path # load in the files self.wsi_image = openslide.open_slide(self.WSI_path) # ground_truth = openslide.open_slide(ground_truth_dir) if self.Mask_truth_path: self.mask_truth = cv2.imread(self.Mask_truth_path) if self.Heatmap_path: self.heat_map = np.load(self.Heatmap_path) self.bbox = np.load(self.Dimension_path) # read in the wsi image at level 4, downsampled by 16 self.dims = self.wsi_image.dimensions # dims = wsi_image.level_dimensions[4] self.wsi_image_thumbnail = np.array( self.wsi_image.read_region( (0, 0), self.slide_level, (int(self.dims[0] / math.pow(2, self.slide_level)), int(self.dims[1] / math.pow(2, self.slide_level))))) self.wsi_image_thumbnail = self.wsi_image_thumbnail[:, :, :3].astype( 'uint8') # read in the ground_truth self.mask_truth = self.mask_truth[:, :, 0].astype('uint8')
class AllSlide(AbstractSlide): @property def level_dimensions(self): return self._osr.level_dimensions def read_region(self, location, level, size): return self._osr.read_region(location, level, size).convert('RGB') @property def level_count(self): return self._osr.level_count def __init__(self, filename): super(AllSlide, self).__init__() self.filename = filename print filename if filename.endswith('.svs'): self._osr = openslide.open_slide(filename) elif filename.endswith('.kfb'): self._osr = kfbslide.KfbSlide(filename) else: print('not support ', filename.split('.')[-1],'!') exit(0)
def get(self, path): with self._lock: if path in self._cache: # Move to end of LRU slide = self._cache.pop(path) self._cache[path] = slide return slide osr = open_slide(path) slide = DeepZoomGenerator(osr) try: mpp_x = osr.properties[openslide.PROPERTY_NAME_MPP_X] mpp_y = osr.properties[openslide.PROPERTY_NAME_MPP_Y] slide.mpp = (float(mpp_x) + float(mpp_y)) / 2 except (KeyError, ValueError): slide.mpp = 0 with self._lock: if path not in self._cache: if len(self._cache) == self.cache_size: self._cache.popitem(last=False) self._cache[path] = slide return slide
def get_list_of_random_points(self): _slide = open_slide(self._slidepath) dims = _slide.dimensions list_points = self.get_coordinates_as_list(dims) num_accumulated = 0 new_list = [] for random_idx in range(len(list_points)): loc = (list_points[random_idx, 0], list_points[random_idx, 1]) this_tile = _slide.read_region(loc, self._level, self._tile_size) result = check_tissue_region(this_tile) if (result == True): num_accumulated += 1 new_list.append( np.array([ list_points[random_idx, 0], list_points[random_idx, 1] ])) if (num_accumulated == self.random_extract): return np.array(new_list) return list_points
def preprocessWorker(self): while (True): if self.preprocessorOutQueue.qsize() < 500: (tile_x, tile_y, filename, coordinates, tile_current) = self.preprocessQueue.get() sl = openslide.open_slide(filename) tn = sl.read_region( location=(int(coordinates[0] - margin), int(coordinates[1] - margin)), level=0, size=(int(coordinates[2] - coordinates[0] + 2 * margin), int(coordinates[3] - coordinates[1] + 2 * margin))) X_test = np.float32( cv2.cvtColor(np.array(tn), cv2.COLOR_BGRA2RGB))[:, :, ::-1] X_test = cv2.cvtColor(X_test, cv2.COLOR_BGR2RGB) #X_test = np.reshape(X_test, newshape=[1,512,512,3]) self.preprocessorOutQueue.put( (X_test, tile_x, tile_y, coordinates, tile_current)) else: time.sleep(0.1)
def sample_tiles_from_image(tile_size, tile_number, image_path): #Sample n tiles of size mxm from image sampled_tiles = [] try: slide = open_slide(os.path.join(GTEx_directory, image_path)) tiles = DeepZoomGenerator(slide, tile_size=tile_size, overlap=0, limit_bounds=False) tile_level = range(len(tiles.level_tiles))[tile_level_index] tile_dims = tiles.level_tiles[tile_level_index] count = 0 t = time.time() # expect sampling rate to be at least 1 tile p/s. If time take is greater than this, move to next image. while (count < tile_number and (time.time() - t < tile_number * 2)): #retreive tile tile = tiles.get_tile(tile_level, (np.random.randint( tile_dims[0]), np.random.randint(tile_dims[1]))) image = np.array(tile.getdata(), dtype=np.float32).reshape(tile.size[0], tile.size[1], 3) #calculate mean pixel intensity mean_pixel = np.mean(image.flatten()) image = imresize(image, (299, 299)) if mean_pixel > 230: continue elif mean_pixel <= 230: sampled_tiles.append(image) count += 1 if (time.time() - t > tile_number * 2): print("Timeout") except Exception as e: print("Error") return sampled_tiles
def gen_imgs(samples, batch_size, shuffle=False): """This function returns a generator that yields tuples of ( X: tensor, float - [batch_size, 224, 224, 3] y: tensor, int32 - [batch_size, 224, 224, NUM_CLASSES] ) input: samples: samples dataframe input: batch_size: The number of images to return for each pull output: yield (X_train, y_train): generator of X, y tensors option: base_truth_dir: path, directory of truth slides option: shuffle: bool, if True shuffle samples """ num_samples = len(samples) print(num_samples) images = [] for _, batch_sample in batch_samples.iterrows(): with openslide.open_slide(batch_sample.slide_path) as slide: tiles = DeepZoomGenerator(slide, tile_size=224, overlap=0, limit_bounds=False) print(batch_sample.tile_loc[::], batch_sample.tile_loc[::-1]) img = tiles.get_tile(tiles.level_count - 1, batch_sample.tile_loc[::-1]) images.append(np.array(img)) X_train = np.array(images) yield X_train
def __init__(self, slide_loc, set_hdf5_file, normalizer=None, background=0.2, size=255, reject_rate=0.1, ignore_repeat=False): """ Args: - slide_loc: A .svs file of the H&E stained slides - normalizer: A tile normalizer object - background: The maximum precentage of background allowed for a saved tile - size: The width and hight of the tiles at each zoom level - reject_rate: The precentage of rejected tiles to save - ignore_repeat: Automatically overwrte repeated files in the dataset """ self.normalizer = normalizer self.background = background self.size = size self.reject_rate = reject_rate self.slide = open_slide(slide_loc) self.dz = DeepZoomGenerator(self.slide, size, 0) self.file_name = ".".join(os.path.basename(slide_loc).split(".")[:-1]) self.tiles = {} self.reject_tiles = {} proceed = "y" if self.file_name in set_hdf5_file: if not ignore_repeat: print(f"{self.file_name} is already in the dataset. Do you wish to overwrite these tiles? [y/n]") proceed = input() if proceed == "y": del set_hdf5_file[self.file_name] if proceed == "y": self.h5_group = set_hdf5_file.create_group(self.file_name) self._save_tiles() print()
def AddDataset(self, slidePath, maskPath): slideFileName = slidePath.split('/')[-1] dataName = slideFileName.split('.tif')[0] #slide = openslide.open_slide(slidePath) mask = openslide.open_slide(maskPath) window_H = window_W = self._win_size windowShape = (window_H, window_W) if os.path.exists(self._VGGcsvDir + "/" +dataName+".csv"): #read coordinates from .txt file coorPath = self._VGGcsvDir + "/" + dataName + ".csv" file = open( coorPath,'r') coor_lines = file.readlines() for i in xrange(len(coor_lines)): line = coor_lines[i] elems = line.rstrip().split(',') #labelCoor = int(elems[0]) WCoor = int(elems[1]) HCoor = int(elems[2]) #slideTile = self._GetPatch(slide, WCoor- windowShape[0]/2, HCoor- windowShape[1]/2, windowShape, self._workingLevel) maskTile = self._GetPatch(mask, WCoor- windowShape[0]/2, HCoor- windowShape[1]/2, windowShape, self._workingLevel) r2,g2,b2,a2 = cv2.split(maskTile) maskTile = cv2.merge([r2]) if maskTile.max()<100: self._neg_coor_list.append([WCoor, HCoor, 0, slidePath, maskPath]) elif maskTile[maskTile.shape[0]/2][maskTile.shape[1]/2] > 100: self._pos_coor_list.append([WCoor, HCoor, 1, slidePath, maskPath]) return True else: print "Failure to find the VGGcsv file: " + self._VGGcsvDir + "/" +dataName+".csv" return False
def open_slide(slide_num, folder, training): """ Open a whole-slide image, given an image number. Args: slide_num: Slide image number as an integer. folder: Directory in which the slides folder is stored, as a string. This should contain either a `training_image_data` folder with images in the format `TUPAC-TR-###.svs`, or a `testing_image_data` folder with images in the format `TUPAC-TE-###.svs`. training: Boolean for training or testing datasets. Returns: An OpenSlide object representing a whole-slide image. """ if training: filename = os.path.join(folder, "training_image_data", "TUPAC-TR-{}.svs".format(str(slide_num).zfill(3))) else: # Testing images filename = os.path.join(folder, "testing_image_data", "TUPAC-TE-{}.svs".format(str(slide_num).zfill(3))) slide = openslide.open_slide(filename) return slide
def img_mask_check(img_label, img_path, mask_path, data_df): """ Displays the actual image side-by-side with the mask in a labeled subplot Inputs: img_label: id associated with a sample from the training set ID list img_path: path to the directory where the images are located mask_path: path to the directory where the masks are located data_df: dataframe holding the training information with image IDs and associated scores Output: None """ test_im_path = os.path.join(img_path, f'{img_label}.tiff') test_mask_path = os.path.join(mask_path, f'{img_label}_mask.tiff') print('Test image file: ', test_im_path) img_check = openslide.open_slide(test_im_path) dims = img_check.level_dimensions img_check.close() print('\nFull-size image dimensions: \t\t', dims[0], '\nOne-fourth size image dimensions: \t', dims[1], '\nOne-sixteenth size image dimensions: \t', dims[2], '\n\nImage preview:') test_img = open_slide_level(test_im_path, level=2) test_mask = open_slide_level(test_mask_path, level=2) cmap = matplotlib.colors.ListedColormap( ['black', 'gray', 'green', 'yellow', 'orange', 'red']) f, ax = plt.subplots(1, 2, figsize=(10, 6)) ax[0].imshow(test_img) ax[1].imshow(test_mask[:, :, 0], cmap=cmap, interpolation='nearest', vmin=0, vmax=5) data_provider, isup_grade, gleason_score = data_df.loc[img_label] plt.suptitle( f"ID: {img_label}\nSource: {data_provider} ISUP: {isup_grade} Gleason: {gleason_score}" )
def openslide_test_file(full_file_path, file_type='svs'): """This will use the openslide bindings to get the width, height and filesize for an \ image or return an Error otherwise""" width = height = filesize = orig_resolution = slide_title = md5 = None ##TODO: Look into adding a file type which by looking at the extension? extension = os.path.splitext(full_file_path)[1] if extension not in ['.ndpi', '.svs']: #Should just return gracefully?"" print extension return (False, None, None, None, None, None, None, None) try: im = openslide.open_slide(full_file_path) (width, height) = im.dimensions base_file_name = os.path.basename(full_file_path) filesize = os.path.getsize(full_file_path) if (file_type == 'svs'): try: orig_resolution = im.properties['aperio.AppMag'] except: orig_resolution = 'UnkSVSReadError' elif (file_type == 'ndpi'): orig_resolution = 40 #md5 = md5Checksum(full_file_path) md5 = None slide_name = os.path.basename(full_file_path) sld_properties = im.properties return (True, width, height, filesize, orig_resolution, slide_name, md5, sld_properties) except OpenSlideError, e: print "Openslide returned an error", full_file_path print >> sys.stderr, "Verify failed with:", repr(e.args) print "Openslide returned an error", full_file_path
def applyMask(filename): #open slide using OpenSlide, save the RGB and HLS representations of the thumbnail wsiOG = openslide.open_slide(filename) wsiThmbnl = wsiOG.read_region((0, 0), 9, wsiOG.level_dimensions[9]) wsiThmbnl = cv2.cvtColor(np.asarray(wsiThmbnl), cv2.COLOR_RGBA2RGB) wsiHLS = cv2.cvtColor(wsiThmbnl, cv2.COLOR_RGB2HLS) wsiThmbnl = wsiThmbnl[:, :, 1] #keep the Green channel of RGB thumbnail hlsMask = wsiHLS[:, :, 0] #keep the Hue channel of HLS thumbnail #everything with Green < 220 and Hue > 130 is tissue (and everything not fitting these criteria are non-tissue) gMask = wsiThmbnl < 220 hMask = hlsMask > 130 hlsMask = gMask & hMask #turn mask from boolean into black and white hlsMask = np.uint8(hlsMask) hlsMask[hlsMask == 1] = 255 #use morphological operations to smooth out the mask and get rid of inconsistencies kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (24, 24)) hlsMask = cv2.morphologyEx(hlsMask, cv2.MORPH_CLOSE, kernel) return hlsMask
def overlay_mask_total(data_path, label_path, im_name): label = np.load(os.path.join(label_path, im_name + '.npy')) wsi = openslide.open_slide(os.path.join(data_path, im_name + '.svs')) dzi = openslide.deepzoom.DeepZoomGenerator(wsi, tile_size=512, overlap=0, limit_bounds=False) # extract tiles x_tile = dzi.level_tiles[14][0] # column y_tile = dzi.level_tiles[14][1] # row num_tiles = x_tile * y_tile # go through each column for i in range(x_tile): # go thtough each row for j in range(y_tile): tile = dzi.get_tile(14, (i, j)) if j == 0: col_im = tile else: col_im = pil_concat_v(col_im, tile) if i == 0: total_im = col_im else: total_im = pil_concat_h(total_im, col_im) if label.shape[0] != total_im.size[1]: if label.shape[0] > total_im.size[1]: label = np.delete(label, 1, 0) overlay_im = overlay_mask_tile(total_im, label) return overlay_im
def Cut_ROI(name,ref_level=4,disk_size=4,Mask_=False): if '/' in name: cut=name.split('/')[-1] folder=cut.split('.')[0] else: folder=name.split(".")[0] if Mask_: pieces=name.split('/')[:-2] folder_mask=folder+"_Mask" Mask_adresse="" for i in range(len(pieces)): Mask_adresse+=pieces[i]+"/" Mask_adresse+=folder_mask.split("_")[0]+"_Mask"+"/"+folder_mask+".tif" else: Mask_adresse=None if not os.path.exists(folder): os.makedirs(folder) slide = openslide.open_slide(name) lowest_res=slide.level_count-2 s=np.array(slide.read_region((0,0),lowest_res,slide.level_dimensions[lowest_res]))[:,:,1] binary=Mask_ROI_cl(s,disk_size,220) stru = [[1,1,1],[1,1,1],[1,1,1]] blobs, number_of_blobs = ndimage.label(binary,structure=stru) for i in range(1,number_of_blobs): y,x=np.where(blobs == i) x_0=min(x) y_0=min(y) w=max(x)-x_0 h=max(y)-y_0 new_x,new_y=get_X_Y(slide,x_0,y_0,lowest_res) Best_Slicer_rec(slide,lowest_res,new_x,new_y,w,h,-1,"./"+folder+"/"+folder,ref_level,Mask_adresse)
def display_tissue_feature_gradient(feature, tissue): from openslide import open_slide features, expression, donorIDs, transcriptIDs, technical_factors, technical_headers, technical_idx = extract_final_layer_data(tissue, 'retrained', 'mean', '256') sorted_idx = np.argsort(features[:,feature - 1]) donorIDs_ordered = donorIDs[sorted_idx] gradient_IDs = [donorIDs_ordered[20*i] for i in range(13)] tissue_filepath = os.path.join(GTEx_directory,'data','raw',tissue) LungGTExIDs = os.listdir(tissue_filepath) LungdonorIDs = [x.split('.')[0].split('-')[1] for x in LungGTExIDs] ordered_GTExIDs = np.array(LungGTExIDs)[[LungdonorIDs.index(x.decode('utf-8')) for x in donorIDs_ordered]] thumbnails = [] pbar = tqdm(total=len(ordered_GTExIDs)) for (k,ID) in enumerate(ordered_GTExIDs): image_filepath = os.path.join(GTEx_directory,'data','raw','Lung', ID) slide = open_slide(image_filepath) thumbnail = slide.get_thumbnail(size=(400,400)) feature_value = features[:,feature - 1][sorted_idx[k]] thumbnails.append((thumbnail, feature_value)) pbar.update(1) return thumbnails
def _preprocessing_tiff(tiff_dir, log): # Deleting the background of the slide # And count the number of patches slide = openslide.open_slide(tiff_dir) low_dim_level = slide.get_best_level_for_downsample(FLAGS.down_sample_rate) assert low_dim_level == math.log(FLAGS.down_sample_rate, 2) low_dim_size = slide.level_dimensions[low_dim_level] low_dim_img = slide.read_region((0, 0), low_dim_level, low_dim_size) # --transform to hsv space low_hsv_img = low_dim_img.convert("HSV") _, low_s, _ = low_hsv_img.split() # --OSTU threshold low_s_thre = filters.threshold_otsu(np.array(low_s)) low_s_bin = low_s > low_s_thre low_s_bin = low_s_bin.transpose() # sample rate : 512 width = low_s_bin.shape[0] height = low_s_bin.shape[1] sample_mat = np.zeros((width, height), dtype=np.bool) sample_mat[0:width:4, 0:height:4] = True low_s_bin = np.logical_and(low_s_bin, sample_mat) num_patches = np.sum(low_s_bin) assert num_patches > 0 sparse_s_bin = coo_matrix(low_s_bin) log.writelines("PatchNum:" + str(np.sum(low_s_bin)) + '\n') assert num_patches==len(sparse_s_bin.data) # set the num of threads global_num_threads = FLAGS.global_num_threads log.writelines("ThreadNum:"+str(global_num_threads)+'\n') return sparse_s_bin, num_patches, global_num_threads
def patch_slides(slide_files, output_dir, patch_size, magnification, white_pixel_thresh=20, sampling=1, white_max_value=220): if isinstance(slide_files, pd.Series): slide_files = slide_files.values results = [] for slide_file in tqdm(slide_files): os_img = openslide.open_slide(slide_file) n_patches, n_valid_patches = patch_slide(os_img, output_dir, patch_size, magnification, white_pixel_thresh, sampling, white_max_value) results.append({ 'file': slide_file.rsplit('/', 1)[-1], 'total_patches': n_patches, 'saved_patches': n_valid_patches, 'perc_saved_patches': round(n_valid_patches / n_patches, 2) }) results = pd.DataFrame(results)[[ 'file', 'total_patches', 'saved_patches', 'perc_saved_patches' ]] return results
def ApplyToSlideWrite(slide, table, f, outputfilename=None): # Slide is a string of the location of the file # This function applies a function f to the whole slide, this slide is given as input with a table # which contains all the patches on which to apply the function. # Their is also a optionnal outputfilename # table is a iterable where each element has 5 attributes: # x, y, w, h, res input_slide = openslide.open_slide(slide) local_dir = slide.split('/')[0:-1] local_dir = "/" + os.path.join(*local_dir) local_dir = os.path.join(local_dir, "temp_build") outputfilename = outputfilename if outputfilename is not None else local_dir CheckOrCreate(outputfilename) dim1, dim2 = input_slide.dimensions #output_slide = Vips.Image.black(dim1, dim2) pbar = ProgressBar() for param in pbar(table): image = np.array(GetImage(input_slide, param))[:, :, :3] image = f(image) outfile = os.path.join( outputfilename, "{}_{}.tiff".format(param[0], param[1])) imsave(outfile, image) return outputfilename
def load_slide(): slidefile = app.config['DEEPZOOM_SLIDE'] if slidefile is None: raise ValueError('No slide file specified') config_map = { 'DEEPZOOM_TILE_SIZE': 'tile_size', 'DEEPZOOM_OVERLAP': 'overlap', 'DEEPZOOM_LIMIT_BOUNDS': 'limit_bounds', } opts = {v: app.config[k] for k, v in config_map.items()} slide = open_slide(slidefile) app.slides = {SLIDE_NAME: DeepZoomGenerator(slide, **opts)} app.associated_images = [] app.slide_properties = slide.properties for name, image in slide.associated_images.items(): app.associated_images.append(name) slug = slugify(name) app.slides[slug] = DeepZoomGenerator(ImageSlide(image), **opts) try: mpp_x = slide.properties[openslide.PROPERTY_NAME_MPP_X] mpp_y = slide.properties[openslide.PROPERTY_NAME_MPP_Y] app.slide_mpp = (float(mpp_x) + float(mpp_y)) / 2 except (KeyError, ValueError): app.slide_mpp = 0
def processWholeSlide(self, job: SlideRunnerPlugin.pluginJob): filename = job.slideFilename self.slide = openslide.open_slide(filename) # 1 HPF = 0.237 mm^2 A = job.configuration[2] # mm^2 W_hpf_microns = np.sqrt(A * 4 / 3) * 1000 # in microns H_hpf_microns = np.sqrt(A * 3 / 4) * 1000 # in microns micronsPerPixel = self.slide.properties[openslide.PROPERTY_NAME_MPP_X] W_hpf = int(W_hpf_microns / float(micronsPerPixel)) * np.sqrt( float(int(job.configuration[1]))) H_hpf = int(H_hpf_microns / float(micronsPerPixel)) * np.sqrt( float(int(job.configuration[1]))) center = (int((job.coordinates[0] + 0.5 * job.coordinates[2])), int((job.coordinates[1] + 0.5 * job.coordinates[3]))) self.annos = list() if (int(job.configuration[1]) == 1): myanno = annotations.rectangularAnnotation(0, center[0] - W_hpf / 2, center[1] - H_hpf / 2, center[0] + W_hpf / 2, center[1] + H_hpf / 2, 'High-Power Field') else: myanno = annotations.rectangularAnnotation( 0, center[0] - W_hpf / 2, center[1] - H_hpf / 2, center[0] + W_hpf / 2, center[1] + H_hpf / 2, '%d High-Power Fields' % int(job.configuration[1])) self.annos.append(myanno) self.updateAnnotations()
def main(): slide = op.open_slide("/home/bob/Downloads/CMU-1.ndpi") for i in range(0, 150): cmd = "cp -r /home/bob/Documents/openSlide/CMU-1_files/16/1_" + str( i) + ".jpeg" + " /home/bob/Documents/data/1/" sp.call(cmd, shell=True)