def worker(img_name, desc, wnd_size, l0_model): try: im_h = imread(img_name) except IOError as e: return None # assume H-plane is given in the image # -preprocessing # if im.ndim == 3: # im_h, _ = rgb2he(im, normalize=True) # im_h = equalize_adapthist(im_h) # im_h = rescale_intensity(im_h, out_range=(0,255)) # im_h = im_h.astype(np.uint8) # else: # return None print("...start on", img_name) bag = [] wnd = [] itw1 = sliding_window(im_h.shape, (wnd_size, wnd_size), start=(0, 0), step=(wnd_size, wnd_size)) for w1 in itw1: # for each "large window": # -divide it in windows of level-0 size # -classify these "small windows" # -build the histogram of codeblock frequencies wnd1 = im_h[w1[0]:w1[1], w1[2]:w1[3]] if wnd1.sum() < wnd_size**2 / 100: continue # not enough non-zero pixels itw0 = sliding_window( wnd1.shape, (l0_model['window_size'], l0_model['window_size']), start=(0, 0), step=(l0_model['window_size'], l0_model['window_size'])) ldesc = [] for w0 in itw0: ldesc.append(desc.compute(wnd1[w0[0]:w0[1], w0[2]:w0[3]])) X = np.vstack(ldesc) y = l0_model['codebook'].predict(X) h = np.zeros(l0_model['codebook'].cluster_centers_.shape[0] ) # histogram of code blocks for k in range(y.size): h[y[k]] += 1.0 h /= y.size # frequencies bag.append(h) # add it to the bag wnd.append(w1) # end for all "large windows" with ModelPersistence('.'.join(img_name.split('.')[:-1]) + '_bag_l1.pkl', 'c', format='pickle') as d: d['bag_l1'] = dict([('hist_l0', bag), ('regs', wnd)]) print('...end on', img_name) return bag
def worker(img_name, desc, wnd_size, l0_model): try: im_h = imread(img_name) except IOError as e: return None # assume H-plane is given in the image # -preprocessing # if im.ndim == 3: # im_h, _ = rgb2he(im, normalize=True) # im_h = equalize_adapthist(im_h) # im_h = rescale_intensity(im_h, out_range=(0,255)) # im_h = im_h.astype(np.uint8) # else: # return None print("...start on", img_name) bag = [] wnd = [] itw1 = sliding_window(im_h.shape, (wnd_size,wnd_size), start=(0,0), step=(wnd_size,wnd_size)) for w1 in itw1: # for each "large window": # -divide it in windows of level-0 size # -classify these "small windows" # -build the histogram of codeblock frequencies wnd1 = im_h[w1[0]:w1[1], w1[2]:w1[3]] if wnd1.sum() < wnd_size**2/100: continue # not enough non-zero pixels itw0 = sliding_window(wnd1.shape, (l0_model['window_size'],l0_model['window_size']), start=(0,0), step=(l0_model['window_size'],l0_model['window_size'])) ldesc = [] for w0 in itw0: ldesc.append(desc.compute(wnd1[w0[0]:w0[1], w0[2]:w0[3]])) X = np.vstack(ldesc) y = l0_model['codebook'].predict(X) h = np.zeros(l0_model['codebook'].cluster_centers_.shape[0]) # histogram of code blocks for k in range(y.size): h[y[k]] += 1.0 h /= y.size # frequencies bag.append(h) # add it to the bag wnd.append(w1) # end for all "large windows" with ModelPersistence('.'.join(img_name.split('.')[:-1])+'_bag_l1.pkl', 'c', format='pickle') as d: d['bag_l1'] = dict([('hist_l0', bag), ('regs', wnd)]) print('...end on', img_name) return bag
def extract_descriptors_he(_img, w_size, _ncpus=None): """ EXRACT_LOCAL_DESCRIPTORS_HE: extracts a set of local descriptors of the image: - histogram of Hue values - histogram of haematoxylin and eosin planes - Gabor descriptors in haematoxylin and eosin spaces, respectively - local binary patterns in haematoxylin and eosin spaces, respectively :param _img: numpy.ndarray :param w_size: int :return: list """ assert (_img.ndim == 3) img_iterator = sliding_window(_img.shape[:-1], (w_size, w_size), step=(w_size, w_size)) # non-overlapping windows gabor = GaborDescriptor() lbp = LBPDescriptor() hsv = rgb2hsv(_img) h, e, _ = rgb2he2(_img) res = [] with ProcessPoolExecutor(max_workers=_ncpus) as executor: for w_coords in img_iterator: res.append(executor.submit(_worker2, hsv[:,:,0], h, e, gabor, lbp, w_coords)) desc = [] for f in as_completed(res): desc.append(f.result()) return desc
def get_gabor_desc(img, gdesc, w_size, scale=1.0, mask=None, _ncpus=None): """ Extract local Gabor descriptors by scanning an image. :param img: numpy.ndarray Input intensity (grey-scale) image. :param gdesc: txtgrey.GaborDescriptor The parameters of the Gabor wavelets to be used. :param w_size: integer Window size (the sliding window is square-shaped). :param scale: float The image may be scaled prior to any descriptor extraction. :param mask: numpy.ndarray A mask (logical image) indicating the object regions in the image. :return: list A list with the local descriptors corresponding to each position of the sliding window. Each element of the list is a vector containing the coordinates of the local window (first 4 elements) and the 2 vectors of values for the local Gabor descriptors (one with the mean responses and one with the variances). """ assert (img.ndim == 2) img_ = rescale(img, scale) if mask is not None: assert (mask.ndim == 2) assert (mask.shape == img.shape) mask = img_as_ubyte(resize(mask, img_.shape)) img_iterator = sliding_window(img_.shape, (w_size, w_size), step=(w_size, w_size)) # non-overlapping windows res = [] if mask is None: with ProcessPoolExecutor(max_workers=_ncpus) as executor: for w_coords in img_iterator: time.sleep(0.01) res.append(executor.submit(_gabor_worker, img_, gdesc, w_coords)) else: th = w_size * w_size / 20.0 # consider only those windows with more than 5% pixels from object with ProcessPoolExecutor(max_workers=_ncpus) as executor: for w_coords in img_iterator: time.sleep(0.01) if mask[w_coords[0]:w_coords[1], w_coords[2]:w_coords[3]].sum() > th: res.append(executor.submit(_gabor_worker, img_, gdesc, w_coords)) desc = [] for f in as_completed(res): desc.append(f.result()) return desc
def extract_descriptors_he(_img, w_size, _ncpus=None): """ EXRACT_LOCAL_DESCRIPTORS_HE: extracts a set of local descriptors of the image: - histogram of Hue values - histogram of haematoxylin and eosin planes - Gabor descriptors in haematoxylin and eosin spaces, respectively - local binary patterns in haematoxylin and eosin spaces, respectively :param _img: numpy.ndarray :param w_size: int :return: list """ assert (_img.ndim == 3) img_iterator = sliding_window(_img.shape[:-1], (w_size, w_size), step=(w_size, w_size)) # non-overlapping windows gabor = GaborDescriptor() lbp = LBPDescriptor() hsv = rgb2hsv(_img) h, e, _ = rgb2he2(_img) res = [] with ProcessPoolExecutor(max_workers=_ncpus) as executor: for w_coords in img_iterator: res.append( executor.submit(_worker2, hsv[:, :, 0], h, e, gabor, lbp, w_coords)) desc = [] for f in as_completed(res): desc.append(f.result()) return desc
def grow_bag_from_new_image(image, desc, w_size, n_obj, **kwargs): """ Extracts local descriptors from a new image. :param image: numpy.array Image data (single channel). :param desc: LocalDescriptor Local descriptor for feature extraction. :param w_size: tuple (width, height) of the sub-windows from the image. :param n_obj: int Maximum number of objects to be added to the bag. :param kwargs: dict Other parameters: 'roi': region of interest (default: None) 'sampling_strategy': how the image should be sampled: 'random' for random sampling 'sliding' for systematic, sliding window scanning of the image 'it_start': where the scanning of the image starts (for sliding window sampling strategy) (default (0,0)) 'it_step': step from one window to the next (for sliding window sampling strategy) (default (1,1)) 'discard_empty': (boolean) whether an empy patch should still be processed or simply discarded. Default: False :return: dict A dictionary with two elements: <name of the descriptor>: list 'regions': list The first list contains the feature descriptors. The second list contains the corresponding window positions. See also: grow_bag_with_new_features """ if 'roi' not in kwargs: roi = None else: roi = kwargs['roi'] if 'it_start' not in kwargs: it_start = (0,0) else: it_start = kwargs['it_start'] if 'it_step' not in kwargs: it_step = (1,1) else: it_step = kwargs['it_step'] if 'sampling_strategy' not in kwargs: sampling_strategy = 'random' else: sampling_strategy = kwargs['sampling_strategy'] if 'discard_empty' in kwargs: discard_empty = kwargs['discard_empty'] else: discard_empty = False w_offset = (0, 0) if isinstance(desc, HaarLikeDescriptor): # this one works on integral images image = intg_image(image) # the sliding window should also be increased by 1: w_offset = (1, 1) w_size = (w_size[0] + w_offset[0], w_size[1] + w_offset[1]) # create iterator: sampling_strategy = sampling_strategy.lower() if sampling_strategy == 'random': if roi is None: itw = random_window(image.shape, w_size, n_obj) else: itw = random_window_on_regions(image.shape, roi, w_size, n_obj) elif sampling_strategy == 'sliding': if roi is None: itw = sliding_window(image.shape, w_size, start=it_start, step=it_step) else: itw = sliding_window_on_regions(image.shape, roi, w_size, step=it_step) else: raise ValueError('Unknown strategy.') bag = [] wnd = [] n = 0 for r in itw: if discard_empty and image[r[0]:r[1], r[2]:r[3]].sum() < 1e-16: continue # adjust if needed: r2 = (r[0], r[1] - w_offset[1], r[2], r[3] - w_offset[0]) wnd.append(r2) bag.append(desc.compute(image[r[0]:r[1], r[2]:r[3]])) n += 1 if n > n_obj: break return {desc.name: bag, 'regs': wnd}
def main(): p = opt.ArgumentParser(description=""" Segments a number of rectangular contexts from a H&E slide. The contexts are clusters of similar regions of the image. The similarity is based on various textural descriptors. """) p.add_argument( 'meta_file', action='store', help='XML file describing the structure of the imported file') p.add_argument('scale', action='store', help='which of the scales to be processed') p.add_argument('ctxt', action='store', help='number of contexts to extract', type=int) p.add_argument('wsize', action='store', help='size of the (square) regions', type=int) p.add_argument('--prefix', action='store', help='optional prefix for the resulting files', default=None) p.add_argument( '--gabor', action='store_true', help='compute Gabor descriptors and generate the corresponding contexts' ) p.add_argument( '--lbp', action='store_true', help= 'compute LBP (local binary patterns) descriptors and generate the corresponding contexts' ) p.add_argument( '--mfs', action='store_true', help= 'compute fractal descriptors and generate the corresponding contexts') p.add_argument( '--haralick', action='store_true', help= 'compute Haralick descriptors and generate the corresponding contexts') p.add_argument('--row_min', action='store', type=int, help='start row (rows start at 0)', default=0) p.add_argument('--col_min', action='store', type=int, help='start column (columns start at 0)', default=0) p.add_argument('--row_max', action='store', type=int, help='end row (maximum: image height-1)', default=0) p.add_argument('--col_max', action='store', type=int, help='end column (maximum: image width-1)', default=0) p.add_argument('--eosine', action='store_true', help='should also Eosine component be processed?') args = p.parse_args() xml_file = ET.parse(args.meta_file) xml_root = xml_file.getroot() # find the name of the image: base_name = os.path.basename(xml_root.find('file').text).split('.') if len(base_name) > 1: # at least 1 suffix .ext base_name.pop() # drop the extension base_name = '.'.join( base_name) # reassemble the rest of the list into file name if args.prefix is not None: pfx = args.prefix else: pfx = base_name path = os.path.dirname(args.meta_file) # Check if the required scale exists: vrs = [ _x for _x in xml_root.findall('version') if _x.find('scale').text == args.scale ] if len(vrs) == 0: raise ValueError('The requested scale does not exits.') if len(vrs) > 1: raise ValueError('Inconsistency detected for the requested scale.') all_tiles = vrs[0].findall('tile') # get the info about full image: im_width = int(xml_root.find('original/width').text) im_height = int(xml_root.find('original/height').text) row_min = min(max(args.row_min, 0), im_height - 2) col_min = min(max(args.col_min, 0), im_width - 2) row_max = max(min(args.row_max, im_height - 1), 0) col_max = max(min(args.col_max, im_width - 1), 0) if row_max == 0: row_max = im_height - 1 if col_max == 0: col_max = im_width - 1 if row_max - row_min < args.wsize or col_max - col_min < args.wsize: raise ValueError('Window size too large for requested image size.') # keep only the tiles that overlap with the specified region tiles = [ tl.attrib for tl in all_tiles if int(tl.attrib['x1']) >= col_min and col_max >= int(tl.attrib['x0']) and int(tl.attrib['y1']) >= row_min and row_max >= int(tl.attrib['y0']) ] ## print("ROI covers", len(tiles), "tiles") # Sort the tiles from top to bottom and left to right. # -get all the (i,j) indices of the tiles: rx = re.compile(r'[_.]') ij = np.array([map(int, rx.split(t['name'])[1:3]) for t in tiles]) # -find i_min, i_max, j_min and j_max. Since the tiles are consecutive # (on row and column), these are enough to generate the desired order: tile_i_min, tile_j_min = ij.min(axis=0) tile_i_max, tile_j_max = ij.max(axis=0) row_offset = 0 for i in range(tile_i_min, tile_i_max + 1): col_offset = 0 for j in range(tile_j_min, tile_j_max + 1): # double-check that tile_i_j is in the list of tiles: idx = map(lambda _x, _y: _x['name'] == _y, tiles, len(tiles) * ['tile_' + str(i) + '_' + str(j) + '.ppm']) if not any(idx): raise RuntimeError("Missing tile" + 'tile_' + str(i) + '_' + str(j) + '.ppm') tile = tiles[idx.index(True)] ## print("Current tile:", tile['name']) # Idea: the current tile (i,j) might need to be extended with a stripe # of maximum args.wsize to the left and bottom. So we load (if they # are available) the tiles (i,j+1), (i+1,j) and (i+1,j+1) and extend # the current tile... # a tile from the image is in <path>/<scale>/tile_i_j.ppm im = imread(path + '/' + str(args.scale) + '/' + tile['name']) tile_height, tile_width, _ = im.shape ## print("Tile size:", tile_height, "x", tile_width) # The scanning (sliding) windows will start at (row_offset, col_offset) # (in this tile's coordinate system). We want to have an integer number # of windows so, if needed (and possible) we will extend the current # tile with a block of pixels from the neighboring tiles. # number of windows on the horizontal need_expand_right = False right_pad = 0 right_tile = None if j < tile_j_max: # then we could eventually expand if (tile_width - col_offset) % args.wsize != 0: need_expand_right = True nh = int(mh.ceil((tile_width - col_offset) / args.wsize)) right_pad = nh * args.wsize - (tile_width - col_offset) tile_name = 'tile_' + str(i) + '_' + str(j + 1) + '.ppm' idx = map(lambda _x, _y: _x['name'] == _y, tiles, len(tiles) * [tile_name]) assert (any(idx)) right_tile = tiles[idx.index(True)] # number of windows on the vertical need_expand_bot = False bot_pad = 0 bot_tile = None if i < tile_i_max: if (tile_height - row_offset) % args.wsize != 0: need_expand_bot = True nv = int(mh.ceil((tile_height - row_offset) / args.wsize)) bot_pad = nv * args.wsize - (tile_height - row_offset) tile_name = 'tile_' + str(i + 1) + '_' + str(j) + '.ppm' idx = map(lambda _x, _y: _x['name'] == _y, tiles, len(tiles) * [tile_name]) assert (any(idx)) bot_tile = tiles[idx.index(True)] ## print("Expand: right=", need_expand_right, "bottom=", need_expand_bot) ## print("...by: right=", right_pad, "bottom=", bot_pad, "pixels") rb_tile = None if need_expand_right and need_expand_bot: # this MUST exist if the right and bottom tiles above exist: tile_name = 'tile_' + str(i + 1) + '_' + str(j + 1) + '.ppm' idx = map(lambda _x, _y: _x['name'] == _y, tiles, len(tiles) * [tile_name]) assert (any(idx)) rb_tile = tiles[idx.index(True)] ## if right_tile is not None: ## print("Expansion tile right:", right_tile['name']) ## if bot_tile is not None: ## print("Expansion tile bottom:", bot_tile['name']) ## if rb_tile is not None: ## print("Expansion tile bottom-right:", rb_tile['name']) # expand the image to the right and bottom only if there is a neighboring tile in # that direction r = 1 if right_tile is not None else 0 b = 1 if bot_tile is not None else 0 next_row_offset, next_col_offset = 0, 0 if r + b > 0: # we need to (and we can) pad the image with pixels from neighbors # Enlarge the image to the right and bottom: # The following line gives an error. (TypeError: 'unicode' object is not callable) Why? # im = np.pad(im, ((0, bot_pad), (0, right_pad), (0, 0)), mode='constant') im_tmp = np.zeros((tile_height + b * bot_pad, tile_width + r * right_pad, im.shape[2])) im_tmp[0:tile_height, 0:tile_width, :] = im im = im_tmp if right_tile is not None: # a tile from the image is in <path>/<scale>/tile_i_j.ppm im_tmp = imread(path + '/' + str(args.scale) + '/' + right_tile['name']) im[0:tile_height, tile_width:tile_width + right_pad, :] = im_tmp[0:tile_height, 0:right_pad, :] next_col_offset = right_pad if bot_tile is not None: # a tile from the image is in <path>/<scale>/tile_i_j.ppm im_tmp = imread(path + '/' + str(args.scale) + '/' + bot_tile['name']) im[tile_height:tile_height + bot_pad, 0:tile_width, :] = im_tmp[0:bot_pad, 0:tile_width, :] next_row_offset = bot_pad if rb_tile is not None: # a tile from the image is in <path>/<scale>/tile_i_j.ppm im_tmp = imread(path + '/' + str(args.scale) + '/' + rb_tile['name']) im[tile_height:tile_height + bot_pad, tile_width:tile_width + right_pad, :] = im_tmp[0:bot_pad, 0:right_pad, :] im_tmp = None # discard # From the current tile (padded), we need to process the region # (row_offset, col_offset) -> (im.height, im.width) (with new # height and width). But there might still be some restrictions # due to the region of interest (row_min, col_min) -> (row_max, col_max). # These last coordinates are in global coordinate system! So, first we # convert them to (rmn, cmn) -> (rmx, cmx), and lower bound them to # the offset: rmn = max(row_min - int(tile['y0']), row_offset) rmx = min(row_max - int(tile['y0']) + 1, im.shape[0]) cmn = max(col_min - int(tile['x0']), col_offset) cmx = min(col_max - int(tile['x0']) + 1, im.shape[1]) ## print("Final region of the image:", rmn, rmx, cmn, cmx) im = im[rmn:rmx, cmn:cmx, :] # image to process # tile contains the real coordinates of the region in the image crt_row_min = int(tile['y0']) crt_col_min = int(tile['x0']) col_offset = next_col_offset ## print("Next offsets:", row_offset, col_offset) ## print("=======================================================") ## print("=======================================================") # Finally, we have the image for analysis. Don't forget to transform the coordinates # from current tile system to global image system when saving the results. if im.shape[0] < args.wsize or im.shape[1] < args.wsize: # (what is left of the) tile is smaller than the window size continue # get the H and E planes: h, e, _ = rgb2he2(im) if args.gabor: print("---------> Gabor descriptors:") g = GaborDescriptor() desc_label = 'gabor' print("------------> H plane") # on H-plane: img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) id = np.zeros((1, len(dsc))) # we do not cluster here... # save clustering/contexts - remember, the coordinates are in the # current tile/image system -> should add back the shift z1 = desc_to_matrix( dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0:2] += crt_row_min + rmn z1[:, 2:4] += crt_col_min + cmn z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + tile['name'] + '_' + desc_label + '_h.dat', z2, delimiter="\t") if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) id = np.zeros((1, len(dsc))) # we do not cluster here... # save clustering/contexts - remember, the coordinates are in the # current tile/image system -> should add back the shift z1 = desc_to_matrix( dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0:2] += crt_row_min + rmn z1[:, 2:4] += crt_col_min + cmn z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + tile['name'] + '_' + desc_label + '_e.dat', z2, delimiter="\t") print("OK") # end for j... row_offset = next_row_offset # end for i.... return
def grow_bag_from_new_image(image, desc, w_size, n_obj, **kwargs): """ Extracts local descriptors from a new image. :param image: numpy.array Image data (single channel). :param desc: LocalDescriptor Local descriptor for feature extraction. :param w_size: tuple (width, height) of the sub-windows from the image. :param n_obj: int Maximum number of objects to be added to the bag. :param kwargs: dict Other parameters: 'roi': region of interest (default: None) 'sampling_strategy': how the image should be sampled: 'random' for random sampling 'sliding' for systematic, sliding window scanning of the image 'it_start': where the scanning of the image starts (for sliding window sampling strategy) (default (0,0)) 'it_step': step from one window to the next (for sliding window sampling strategy) (default (1,1)) 'discard_empty': (boolean) whether an empy patch should still be processed or simply discarded. Default: False :return: dict A dictionary with two elements: <name of the descriptor>: list 'regions': list The first list contains the feature descriptors. The second list contains the corresponding window positions. See also: grow_bag_with_new_features """ if 'roi' not in kwargs: roi = None else: roi = kwargs['roi'] if 'it_start' not in kwargs: it_start = (0, 0) else: it_start = kwargs['it_start'] if 'it_step' not in kwargs: it_step = (1, 1) else: it_step = kwargs['it_step'] if 'sampling_strategy' not in kwargs: sampling_strategy = 'random' else: sampling_strategy = kwargs['sampling_strategy'] if 'discard_empty' in kwargs: discard_empty = kwargs['discard_empty'] else: discard_empty = False w_offset = (0, 0) if isinstance(desc, HaarLikeDescriptor): # this one works on integral images image = intg_image(image) # the sliding window should also be increased by 1: w_offset = (1, 1) w_size = (w_size[0] + w_offset[0], w_size[1] + w_offset[1]) # create iterator: sampling_strategy = sampling_strategy.lower() if sampling_strategy == 'random': if roi is None: itw = random_window(image.shape, w_size, n_obj) else: itw = random_window_on_regions(image.shape, roi, w_size, n_obj) elif sampling_strategy == 'sliding': if roi is None: itw = sliding_window(image.shape, w_size, start=it_start, step=it_step) else: itw = sliding_window_on_regions(image.shape, roi, w_size, step=it_step) else: raise ValueError('Unknown strategy.') bag = [] wnd = [] n = 0 for r in itw: if discard_empty and image[r[0]:r[1], r[2]:r[3]].sum() < 1e-16: continue # adjust if needed: r2 = (r[0], r[1] - w_offset[1], r[2], r[3] - w_offset[0]) wnd.append(r2) bag.append(desc.compute(image[r[0]:r[1], r[2]:r[3]])) n += 1 if n > n_obj: break return {desc.name: bag, 'regs': wnd}
def get_gabor_desc(img, gdesc, w_size, scale=1.0, mask=None, _ncpus=None): """ Extract local Gabor descriptors by scanning an image. :param img: numpy.ndarray Input intensity (grey-scale) image. :param gdesc: txtgrey.GaborDescriptor The parameters of the Gabor wavelets to be used. :param w_size: integer Window size (the sliding window is square-shaped). :param scale: float The image may be scaled prior to any descriptor extraction. :param mask: numpy.ndarray A mask (logical image) indicating the object regions in the image. :return: list A list with the local descriptors corresponding to each position of the sliding window. Each element of the list is a vector containing the coordinates of the local window (first 4 elements) and the 2 vectors of values for the local Gabor descriptors (one with the mean responses and one with the variances). """ assert (img.ndim == 2) img_ = rescale(img, scale) if mask is not None: assert (mask.ndim == 2) assert (mask.shape == img.shape) mask = img_as_ubyte(resize(mask, img_.shape)) img_iterator = sliding_window(img_.shape, (w_size, w_size), step=(w_size, w_size)) # non-overlapping windows res = [] if mask is None: with ProcessPoolExecutor(max_workers=_ncpus) as executor: for w_coords in img_iterator: time.sleep(0.01) res.append( executor.submit(_gabor_worker, img_, gdesc, w_coords)) else: th = w_size * w_size / 20.0 # consider only those windows with more than 5% pixels from object with ProcessPoolExecutor(max_workers=_ncpus) as executor: for w_coords in img_iterator: time.sleep(0.01) if mask[w_coords[0]:w_coords[1], w_coords[2]:w_coords[3]].sum() > th: res.append( executor.submit(_gabor_worker, img_, gdesc, w_coords)) desc = [] for f in as_completed(res): desc.append(f.result()) return desc
def main(): p = opt.ArgumentParser(description=""" Segments a number of rectangular contexts from a H&E slide. The contexts are clusters of similar regions of the image. The similarity is based on various textural descriptors. """) p.add_argument('meta_file', action='store', help='XML file describing the structure of the imported file') p.add_argument('scale', action='store', help='which of the scales to be processed') p.add_argument('ctxt', action='store', help='number of contexts to extract', type=int) p.add_argument('wsize', action='store', help='size of the (square) regions', type=int) p.add_argument('--prefix', action='store', help='optional prefix for the resulting files', default=None) p.add_argument('--gabor', action='store_true', help='compute Gabor descriptors and generate the corresponding contexts') p.add_argument('--lbp', action='store_true', help='compute LBP (local binary patterns) descriptors and generate the corresponding contexts') p.add_argument('--mfs', action='store_true', help='compute fractal descriptors and generate the corresponding contexts') p.add_argument('--haralick', action='store_true', help='compute Haralick descriptors and generate the corresponding contexts') p.add_argument('--row_min', action='store', type=int, help='start row (rows start at 0)', default=0) p.add_argument('--col_min', action='store', type=int, help='start column (columns start at 0)', default=0) p.add_argument('--row_max', action='store', type=int, help='end row (maximum: image height-1)', default=0) p.add_argument('--col_max', action='store', type=int, help='end column (maximum: image width-1)', default=0) p.add_argument('--eosine', action='store_true', help='should also Eosine component be processed?') args = p.parse_args() xml_file = ET.parse(args.meta_file) xml_root = xml_file.getroot() # find the name of the image: base_name = os.path.basename(xml_root.find('file').text).split('.') if len(base_name) > 1: # at least 1 suffix .ext base_name.pop() # drop the extension base_name = '.'.join(base_name) # reassemble the rest of the list into file name if args.prefix is not None: pfx = args.prefix else: pfx = base_name path = os.path.dirname(args.meta_file) # Check if the required scale exists: vrs = [_x for _x in xml_root.findall('version') if _x.find('scale').text == args.scale] if len(vrs) == 0: raise ValueError('The requested scale does not exits.') if len(vrs) > 1: raise ValueError('Inconsistency detected for the requested scale.') all_tiles = vrs[0].findall('tile') # get the info about full image: im_width = int(xml_root.find('original/width').text) im_height = int(xml_root.find('original/height').text) row_min = min(max(args.row_min, 0), im_height-2) col_min = min(max(args.col_min, 0), im_width-2) row_max = max(min(args.row_max, im_height-1), 0) col_max = max(min(args.col_max, im_width-1), 0) if row_max == 0: row_max = im_height - 1 if col_max == 0: col_max = im_width - 1 if row_max - row_min < args.wsize or col_max - col_min < args.wsize: raise ValueError('Window size too large for requested image size.') # keep only the tiles that overlap with the specified region tiles = [tl.attrib for tl in all_tiles if int(tl.attrib['x1']) >= col_min and col_max >= int(tl.attrib['x0']) and int(tl.attrib['y1']) >= row_min and row_max >= int(tl.attrib['y0'])] ## print("ROI covers", len(tiles), "tiles") # Sort the tiles from top to bottom and left to right. # -get all the (i,j) indices of the tiles: rx = re.compile(r'[_.]') ij = np.array([map(int, rx.split(t['name'])[1:3]) for t in tiles]) # -find i_min, i_max, j_min and j_max. Since the tiles are consecutive # (on row and column), these are enough to generate the desired order: tile_i_min, tile_j_min = ij.min(axis=0) tile_i_max, tile_j_max = ij.max(axis=0) row_offset = 0 for i in range(tile_i_min, tile_i_max+1): col_offset = 0 for j in range(tile_j_min, tile_j_max+1): # double-check that tile_i_j is in the list of tiles: idx = map(lambda _x,_y: _x['name'] == _y, tiles, len(tiles)*['tile_'+str(i)+'_'+str(j)+'.ppm']) if not any(idx): raise RuntimeError("Missing tile" + 'tile_'+str(i)+'_'+str(j)+'.ppm') tile = tiles[idx.index(True)] ## print("Current tile:", tile['name']) # Idea: the current tile (i,j) might need to be extended with a stripe # of maximum args.wsize to the left and bottom. So we load (if they # are available) the tiles (i,j+1), (i+1,j) and (i+1,j+1) and extend # the current tile... # a tile from the image is in <path>/<scale>/tile_i_j.ppm im = imread(path + '/' + str(args.scale) + '/' + tile['name']) tile_height, tile_width, _ = im.shape ## print("Tile size:", tile_height, "x", tile_width) # The scanning (sliding) windows will start at (row_offset, col_offset) # (in this tile's coordinate system). We want to have an integer number # of windows so, if needed (and possible) we will extend the current # tile with a block of pixels from the neighboring tiles. # number of windows on the horizontal need_expand_right = False right_pad = 0 right_tile = None if j < tile_j_max: # then we could eventually expand if (tile_width - col_offset) % args.wsize != 0: need_expand_right = True nh = int(mh.ceil((tile_width - col_offset) / args.wsize)) right_pad = nh*args.wsize - (tile_width - col_offset) tile_name = 'tile_'+str(i)+'_'+str(j+1)+'.ppm' idx = map(lambda _x,_y: _x['name'] == _y, tiles, len(tiles)*[tile_name]) assert(any(idx)) right_tile = tiles[idx.index(True)] # number of windows on the vertical need_expand_bot = False bot_pad = 0 bot_tile = None if i < tile_i_max: if (tile_height - row_offset) % args.wsize != 0: need_expand_bot = True nv = int(mh.ceil((tile_height - row_offset) / args.wsize)) bot_pad = nv*args.wsize - (tile_height - row_offset) tile_name = 'tile_'+str(i+1)+'_'+str(j)+'.ppm' idx = map(lambda _x,_y: _x['name'] == _y, tiles, len(tiles)*[tile_name]) assert(any(idx)) bot_tile = tiles[idx.index(True)] ## print("Expand: right=", need_expand_right, "bottom=", need_expand_bot) ## print("...by: right=", right_pad, "bottom=", bot_pad, "pixels") rb_tile = None if need_expand_right and need_expand_bot: # this MUST exist if the right and bottom tiles above exist: tile_name = 'tile_'+str(i+1)+'_'+str(j+1)+'.ppm' idx = map(lambda _x,_y: _x['name'] == _y, tiles, len(tiles)*[tile_name]) assert(any(idx)) rb_tile = tiles[idx.index(True)] ## if right_tile is not None: ## print("Expansion tile right:", right_tile['name']) ## if bot_tile is not None: ## print("Expansion tile bottom:", bot_tile['name']) ## if rb_tile is not None: ## print("Expansion tile bottom-right:", rb_tile['name']) # expand the image to the right and bottom only if there is a neighboring tile in # that direction r = 1 if right_tile is not None else 0 b = 1 if bot_tile is not None else 0 next_row_offset, next_col_offset = 0, 0 if r+b > 0: # we need to (and we can) pad the image with pixels from neighbors # Enlarge the image to the right and bottom: # The following line gives an error. (TypeError: 'unicode' object is not callable) Why? # im = np.pad(im, ((0, bot_pad), (0, right_pad), (0, 0)), mode='constant') im_tmp = np.zeros((tile_height+b*bot_pad, tile_width+r*right_pad, im.shape[2])) im_tmp[0:tile_height, 0:tile_width, :] = im im = im_tmp if right_tile is not None: # a tile from the image is in <path>/<scale>/tile_i_j.ppm im_tmp = imread(path + '/' + str(args.scale) + '/' + right_tile['name']) im[0:tile_height, tile_width:tile_width+right_pad, :] = im_tmp[0:tile_height, 0:right_pad, :] next_col_offset = right_pad if bot_tile is not None: # a tile from the image is in <path>/<scale>/tile_i_j.ppm im_tmp = imread(path + '/' + str(args.scale) + '/' + bot_tile['name']) im[tile_height:tile_height+bot_pad, 0:tile_width, :] = im_tmp[0:bot_pad, 0:tile_width, :] next_row_offset = bot_pad if rb_tile is not None: # a tile from the image is in <path>/<scale>/tile_i_j.ppm im_tmp = imread(path + '/' + str(args.scale) + '/' + rb_tile['name']) im[tile_height:tile_height+bot_pad, tile_width:tile_width+right_pad, :] = im_tmp[0:bot_pad, 0:right_pad, :] im_tmp = None # discard # From the current tile (padded), we need to process the region # (row_offset, col_offset) -> (im.height, im.width) (with new # height and width). But there might still be some restrictions # due to the region of interest (row_min, col_min) -> (row_max, col_max). # These last coordinates are in global coordinate system! So, first we # convert them to (rmn, cmn) -> (rmx, cmx), and lower bound them to # the offset: rmn = max(row_min - int(tile['y0']), row_offset) rmx = min(row_max - int(tile['y0']) + 1, im.shape[0]) cmn = max(col_min - int(tile['x0']), col_offset) cmx = min(col_max - int(tile['x0']) + 1, im.shape[1]) ## print("Final region of the image:", rmn, rmx, cmn, cmx) im = im[rmn:rmx, cmn:cmx, :] # image to process # tile contains the real coordinates of the region in the image crt_row_min = int(tile['y0']) crt_col_min = int(tile['x0']) col_offset = next_col_offset ## print("Next offsets:", row_offset, col_offset) ## print("=======================================================") ## print("=======================================================") # Finally, we have the image for analysis. Don't forget to transform the coordinates # from current tile system to global image system when saving the results. if im.shape[0] < args.wsize or im.shape[1] < args.wsize: # (what is left of the) tile is smaller than the window size continue # get the H and E planes: h, e, _ = rgb2he2(im) if args.gabor: print("---------> Gabor descriptors:") g = GaborDescriptor() desc_label = 'gabor' print("------------> H plane") # on H-plane: img_iterator = sliding_window(h.shape, (args.wsize,args.wsize), step=(args.wsize,args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) id = np.zeros((1, len(dsc))) # we do not cluster here... # save clustering/contexts - remember, the coordinates are in the # current tile/image system -> should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0:2] += crt_row_min + rmn z1[:, 2:4] += crt_col_min + cmn z2 = np.matrix(id).transpose() z2 = np.hstack( (z2, z1) ) np.savetxt(pfx+'_'+tile['name']+'_'+desc_label+'_h.dat', z2, delimiter="\t") if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window(h.shape, (args.wsize,args.wsize), step=(args.wsize,args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) id = np.zeros((1, len(dsc))) # we do not cluster here... # save clustering/contexts - remember, the coordinates are in the # current tile/image system -> should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0:2] += crt_row_min + rmn z1[:, 2:4] += crt_col_min + cmn z2 = np.matrix(id).transpose() z2 = np.hstack( (z2, z1) ) np.savetxt(pfx+'_'+tile['name']+'_'+desc_label+'_e.dat', z2, delimiter="\t") print("OK") # end for j... row_offset = next_row_offset # end for i.... return
def main(): p = opt.ArgumentParser(description=""" Segments a number of rectangular contexts from a H&E slide. The contexts are clusters of similar regions of the image. The similarity is based on various textural descriptors. """) p.add_argument('img_file', action='store', help='RGB image file') p.add_argument('ctxt', action='store', help='Number of contexts to extract', type=int) p.add_argument('wsize', action='store', help='Size of the (square) regions', type=int) p.add_argument('--prefix', action='store', help='optional prefix for the resulting files', default=None) p.add_argument( '--gabor', action='store_true', help='compute Gabor descriptors and generate the corresponding contexts' ) p.add_argument( '--lbp', action='store_true', help= 'compute LBP (local binary patterns) descriptors and generate the corresponding contexts' ) p.add_argument( '--mfs', action='store_true', help= 'compute fractal descriptors and generate the corresponding contexts') p.add_argument( '--haralick', action='store_true', help= 'compute Haralick descriptors and generate the corresponding contexts') p.add_argument('--row_min', action='store', type=int, help='start row (rows start at 0)', default=0) p.add_argument('--col_min', action='store', type=int, help='start column (columns start at 0)', default=0) p.add_argument('--row_max', action='store', type=int, help='end row (maximum: image height-1)', default=0) p.add_argument('--col_max', action='store', type=int, help='end column (maximum: image width-1)', default=0) p.add_argument('--eosine', action='store_true', help='should also Eosine component be processed?') args = p.parse_args() base_name = os.path.basename(args.img_file).split('.') if len(base_name) > 1: # at least 1 suffix .ext base_name.pop() # drop the extension base_name = '.'.join( base_name) # reassemble the rest of the list into file name if args.prefix is not None: pfx = args.prefix else: pfx = base_name im = imread(args.img_file) print("Original image size:", im.shape) row_min = min(max(args.row_min, 0), im.shape[0] - 2) col_min = min(max(args.col_min, 0), im.shape[1] - 2) row_max = max(min(args.row_max, im.shape[0] - 1), 0) col_max = max(min(args.col_max, im.shape[1] - 1), 0) if row_max == 0: row_max = im.shape[0] - 1 if col_max == 0: col_max = im.shape[1] - 1 if row_max - row_min < args.wsize or col_max - col_min < args.wsize: raise ValueError('Window size too large for requested image size.') im = im[row_min:row_max + 1, col_min:col_max + 1, :] # crop the image to multiple of wsize: nh, nw = mh.floor(im.shape[0] / args.wsize), mh.floor(im.shape[1] / args.wsize) dh, dw = mh.floor((im.shape[0] - nh * args.wsize) / 2), mh.floor( (im.shape[1] - nw * args.wsize) / 2) im = im[dh:dh + nh * args.wsize, dw:dw + nw * args.wsize, :] print("Image cropped to:", im.shape) imsave(pfx + '_cropped.ppm', im) # get the H and E planes: h, e, _ = rgb2he2(im) if args.gabor: print("---------> Gabor descriptors:") g = GaborDescriptor() desc_label = 'gabor' print("------------> H plane") # on H-plane: img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) dst = pdist_gabor(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0] += row_min + dh z1[:, 2] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2) if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) dst = pdist_gabor(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix( dsc, desc_label) # col 0:4 [row_min, row_max, col_min, col_max] z1[:, 0:2] += row_min + dh z1[:, 2:4] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2) print("OK") if args.haralick: print("---------> Haralick descriptors:") g = GLCMDescriptor() desc_label = 'haralick' print("------------> H plane") # on H-plane: img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) dst = pdist_gabor(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix( dsc, desc_label) # col 0:4 [row_min, row_max, col_min, col_max] z1[:, 0:2] += row_min + dh z1[:, 2:4] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2) if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) dst = pdist_gabor(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix( dsc, desc_label) # col 0:4 [row_min, row_max, col_min, col_max] z1[:, 0:2] += row_min + dh z1[:, 2:4] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2) print("OK") if args.lbp: print("---------> LBP descriptors:") g = LBPDescriptor() desc_label = 'lbp' # on H-plane: print("------------> H plane") img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) dst = pdist_lbp(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix( dsc, desc_label) # col 0:4 [row_min, row_max, col_min, col_max] z1[:, 0:2] += row_min + dh z1[:, 2:4] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2) if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) dst = pdist_lbp(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix( dsc, desc_label) # col 0:4 [row_min, row_max, col_min, col_max] z1[:, 0:2] += row_min + dh z1[:, 2:4] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2) print("OK") if args.mfs: print("---------> MFS descriptors:") g = MFSDescriptor() desc_label = 'mfs' # on H-plane: print("------------> H plane") img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) dst = pdist_mfs(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix( dsc, desc_label) # col 0:4 [row_min, row_max, col_min, col_max] z1[:, 0:2] += row_min + dh z1[:, 2:4] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2) if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) dst = pdist_mfs(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix( dsc, desc_label) # col 0:4 [row_min, row_max, col_min, col_max] z1[:, 0:2] += row_min + dh z1[:, 2:4] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2) print("OK") return