def extract_descriptors_he(_img, w_size, _ncpus=None): """ EXRACT_LOCAL_DESCRIPTORS_HE: extracts a set of local descriptors of the image: - histogram of Hue values - histogram of haematoxylin and eosin planes - Gabor descriptors in haematoxylin and eosin spaces, respectively - local binary patterns in haematoxylin and eosin spaces, respectively :param _img: numpy.ndarray :param w_size: int :return: list """ assert (_img.ndim == 3) img_iterator = sliding_window(_img.shape[:-1], (w_size, w_size), step=(w_size, w_size)) # non-overlapping windows gabor = GaborDescriptor() lbp = LBPDescriptor() hsv = rgb2hsv(_img) h, e, _ = rgb2he2(_img) res = [] with ProcessPoolExecutor(max_workers=_ncpus) as executor: for w_coords in img_iterator: res.append(executor.submit(_worker2, hsv[:,:,0], h, e, gabor, lbp, w_coords)) desc = [] for f in as_completed(res): desc.append(f.result()) return desc
def tissue_region_from_rgb(_img, _min_area=150, _g_th=None): """ TISSUE_REGION_FROM_RGB detects the region(s) of the image containing the tissue. The original image is supposed to represent a haematoxylin-eosin -stained pathology slide. The main purpose of this function is to detect the parts of a large image which most probably contain tissue material, and to discard the background. Usage: tissue_mask = tissue_from_rgb(img, _min_area=150, _g_th=None) Args: img (numpy.ndarray): the original image in RGB color space _min_area (int, default: 150): any object with an area smaller than the indicated value, will be discarded _g_th (int, default: None): the processing is done on the GREEN channel and all pixels below _g_th are considered candidates for "tissue pixels". If no value is given to _g_th, one is computed by K-Means clustering (K=2), and is returned. Returns: numpy.ndarray: a binary image containing the mask of the regions considered to represent tissue fragments int: threshold used for GREEN channel """ if _g_th is None: # Apply vector quantization to remove the "white" background - work in the # green channel: vq = MiniBatchKMeans(n_clusters=2) _g_th = int( np.round(0.95 * np.max( vq.fit(_G(_img).reshape((-1, 1))).cluster_centers_.squeeze()))) mask = _G(_img) < _g_th skm.binary_closing(mask, skm.disk(3), out=mask) mask = img_as_bool(mask) mask = skm.remove_small_objects(mask, min_size=_min_area, in_place=True) # Some hand-picked rules: # -at least 5% H and E # -at most 25% background # for a region to be considered tissue h, e, b = rgb2he2(_img) mask &= (h > np.percentile(h, 5)) | (e > np.percentile(e, 5)) mask &= (b < np.percentile(b, 50)) # at most at 50% of "other components" mask = mh.close_holes(mask) return img_as_bool(mask), _g_th
def tissue_region_from_rgb(_img, _min_area=150, _g_th=None): """ TISSUE_REGION_FROM_RGB detects the region(s) of the image containing the tissue. The original image is supposed to represent a haematoxylin-eosin -stained pathology slide. The main purpose of this function is to detect the parts of a large image which most probably contain tissue material, and to discard the background. Usage: tissue_mask = tissue_from_rgb(img, _min_area=150, _g_th=None) Args: img (numpy.ndarray): the original image in RGB color space _min_area (int, default: 150): any object with an area smaller than the indicated value, will be discarded _g_th (int, default: None): the processing is done on the GREEN channel and all pixels below _g_th are considered candidates for "tissue pixels". If no value is given to _g_th, one is computed by K-Means clustering (K=2), and is returned. Returns: numpy.ndarray: a binary image containing the mask of the regions considered to represent tissue fragments int: threshold used for GREEN channel """ if _g_th is None: # Apply vector quantization to remove the "white" background - work in the # green channel: vq = MiniBatchKMeans(n_clusters=2) _g_th = int(np.round(0.95 * np.max(vq.fit(_G(_img).reshape((-1,1))) .cluster_centers_.squeeze()))) mask = _G(_img) < _g_th skm.binary_closing(mask, skm.disk(3), out=mask) mask = img_as_bool(mask) mask = skm.remove_small_objects(mask, min_size=_min_area, in_place=True) # Some hand-picked rules: # -at least 5% H and E # -at most 25% background # for a region to be considered tissue h, e, b = rgb2he2(_img) mask &= (h > np.percentile(h, 5)) | (e > np.percentile(e, 5)) mask &= (b < np.percentile(b, 50)) # at most at 50% of "other components" mask = mh.close_holes(mask) return img_as_bool(mask), _g_th
def extract_descriptors_he(_img, w_size, _ncpus=None): """ EXRACT_LOCAL_DESCRIPTORS_HE: extracts a set of local descriptors of the image: - histogram of Hue values - histogram of haematoxylin and eosin planes - Gabor descriptors in haematoxylin and eosin spaces, respectively - local binary patterns in haematoxylin and eosin spaces, respectively :param _img: numpy.ndarray :param w_size: int :return: list """ assert (_img.ndim == 3) img_iterator = sliding_window(_img.shape[:-1], (w_size, w_size), step=(w_size, w_size)) # non-overlapping windows gabor = GaborDescriptor() lbp = LBPDescriptor() hsv = rgb2hsv(_img) h, e, _ = rgb2he2(_img) res = [] with ProcessPoolExecutor(max_workers=_ncpus) as executor: for w_coords in img_iterator: res.append( executor.submit(_worker2, hsv[:, :, 0], h, e, gabor, lbp, w_coords)) desc = [] for f in as_completed(res): desc.append(f.result()) return desc
def main(): p = opt.ArgumentParser(description=""" Segments a number of rectangular contexts from a H&E slide. The contexts are clusters of similar regions of the image. The similarity is based on various textural descriptors. """) p.add_argument( 'meta_file', action='store', help='XML file describing the structure of the imported file') p.add_argument('scale', action='store', help='which of the scales to be processed') p.add_argument('ctxt', action='store', help='number of contexts to extract', type=int) p.add_argument('wsize', action='store', help='size of the (square) regions', type=int) p.add_argument('--prefix', action='store', help='optional prefix for the resulting files', default=None) p.add_argument( '--gabor', action='store_true', help='compute Gabor descriptors and generate the corresponding contexts' ) p.add_argument( '--lbp', action='store_true', help= 'compute LBP (local binary patterns) descriptors and generate the corresponding contexts' ) p.add_argument( '--mfs', action='store_true', help= 'compute fractal descriptors and generate the corresponding contexts') p.add_argument( '--haralick', action='store_true', help= 'compute Haralick descriptors and generate the corresponding contexts') p.add_argument('--row_min', action='store', type=int, help='start row (rows start at 0)', default=0) p.add_argument('--col_min', action='store', type=int, help='start column (columns start at 0)', default=0) p.add_argument('--row_max', action='store', type=int, help='end row (maximum: image height-1)', default=0) p.add_argument('--col_max', action='store', type=int, help='end column (maximum: image width-1)', default=0) p.add_argument('--eosine', action='store_true', help='should also Eosine component be processed?') args = p.parse_args() xml_file = ET.parse(args.meta_file) xml_root = xml_file.getroot() # find the name of the image: base_name = os.path.basename(xml_root.find('file').text).split('.') if len(base_name) > 1: # at least 1 suffix .ext base_name.pop() # drop the extension base_name = '.'.join( base_name) # reassemble the rest of the list into file name if args.prefix is not None: pfx = args.prefix else: pfx = base_name path = os.path.dirname(args.meta_file) # Check if the required scale exists: vrs = [ _x for _x in xml_root.findall('version') if _x.find('scale').text == args.scale ] if len(vrs) == 0: raise ValueError('The requested scale does not exits.') if len(vrs) > 1: raise ValueError('Inconsistency detected for the requested scale.') all_tiles = vrs[0].findall('tile') # get the info about full image: im_width = int(xml_root.find('original/width').text) im_height = int(xml_root.find('original/height').text) row_min = min(max(args.row_min, 0), im_height - 2) col_min = min(max(args.col_min, 0), im_width - 2) row_max = max(min(args.row_max, im_height - 1), 0) col_max = max(min(args.col_max, im_width - 1), 0) if row_max == 0: row_max = im_height - 1 if col_max == 0: col_max = im_width - 1 if row_max - row_min < args.wsize or col_max - col_min < args.wsize: raise ValueError('Window size too large for requested image size.') # keep only the tiles that overlap with the specified region tiles = [ tl.attrib for tl in all_tiles if int(tl.attrib['x1']) >= col_min and col_max >= int(tl.attrib['x0']) and int(tl.attrib['y1']) >= row_min and row_max >= int(tl.attrib['y0']) ] ## print("ROI covers", len(tiles), "tiles") # Sort the tiles from top to bottom and left to right. # -get all the (i,j) indices of the tiles: rx = re.compile(r'[_.]') ij = np.array([map(int, rx.split(t['name'])[1:3]) for t in tiles]) # -find i_min, i_max, j_min and j_max. Since the tiles are consecutive # (on row and column), these are enough to generate the desired order: tile_i_min, tile_j_min = ij.min(axis=0) tile_i_max, tile_j_max = ij.max(axis=0) row_offset = 0 for i in range(tile_i_min, tile_i_max + 1): col_offset = 0 for j in range(tile_j_min, tile_j_max + 1): # double-check that tile_i_j is in the list of tiles: idx = map(lambda _x, _y: _x['name'] == _y, tiles, len(tiles) * ['tile_' + str(i) + '_' + str(j) + '.ppm']) if not any(idx): raise RuntimeError("Missing tile" + 'tile_' + str(i) + '_' + str(j) + '.ppm') tile = tiles[idx.index(True)] ## print("Current tile:", tile['name']) # Idea: the current tile (i,j) might need to be extended with a stripe # of maximum args.wsize to the left and bottom. So we load (if they # are available) the tiles (i,j+1), (i+1,j) and (i+1,j+1) and extend # the current tile... # a tile from the image is in <path>/<scale>/tile_i_j.ppm im = imread(path + '/' + str(args.scale) + '/' + tile['name']) tile_height, tile_width, _ = im.shape ## print("Tile size:", tile_height, "x", tile_width) # The scanning (sliding) windows will start at (row_offset, col_offset) # (in this tile's coordinate system). We want to have an integer number # of windows so, if needed (and possible) we will extend the current # tile with a block of pixels from the neighboring tiles. # number of windows on the horizontal need_expand_right = False right_pad = 0 right_tile = None if j < tile_j_max: # then we could eventually expand if (tile_width - col_offset) % args.wsize != 0: need_expand_right = True nh = int(mh.ceil((tile_width - col_offset) / args.wsize)) right_pad = nh * args.wsize - (tile_width - col_offset) tile_name = 'tile_' + str(i) + '_' + str(j + 1) + '.ppm' idx = map(lambda _x, _y: _x['name'] == _y, tiles, len(tiles) * [tile_name]) assert (any(idx)) right_tile = tiles[idx.index(True)] # number of windows on the vertical need_expand_bot = False bot_pad = 0 bot_tile = None if i < tile_i_max: if (tile_height - row_offset) % args.wsize != 0: need_expand_bot = True nv = int(mh.ceil((tile_height - row_offset) / args.wsize)) bot_pad = nv * args.wsize - (tile_height - row_offset) tile_name = 'tile_' + str(i + 1) + '_' + str(j) + '.ppm' idx = map(lambda _x, _y: _x['name'] == _y, tiles, len(tiles) * [tile_name]) assert (any(idx)) bot_tile = tiles[idx.index(True)] ## print("Expand: right=", need_expand_right, "bottom=", need_expand_bot) ## print("...by: right=", right_pad, "bottom=", bot_pad, "pixels") rb_tile = None if need_expand_right and need_expand_bot: # this MUST exist if the right and bottom tiles above exist: tile_name = 'tile_' + str(i + 1) + '_' + str(j + 1) + '.ppm' idx = map(lambda _x, _y: _x['name'] == _y, tiles, len(tiles) * [tile_name]) assert (any(idx)) rb_tile = tiles[idx.index(True)] ## if right_tile is not None: ## print("Expansion tile right:", right_tile['name']) ## if bot_tile is not None: ## print("Expansion tile bottom:", bot_tile['name']) ## if rb_tile is not None: ## print("Expansion tile bottom-right:", rb_tile['name']) # expand the image to the right and bottom only if there is a neighboring tile in # that direction r = 1 if right_tile is not None else 0 b = 1 if bot_tile is not None else 0 next_row_offset, next_col_offset = 0, 0 if r + b > 0: # we need to (and we can) pad the image with pixels from neighbors # Enlarge the image to the right and bottom: # The following line gives an error. (TypeError: 'unicode' object is not callable) Why? # im = np.pad(im, ((0, bot_pad), (0, right_pad), (0, 0)), mode='constant') im_tmp = np.zeros((tile_height + b * bot_pad, tile_width + r * right_pad, im.shape[2])) im_tmp[0:tile_height, 0:tile_width, :] = im im = im_tmp if right_tile is not None: # a tile from the image is in <path>/<scale>/tile_i_j.ppm im_tmp = imread(path + '/' + str(args.scale) + '/' + right_tile['name']) im[0:tile_height, tile_width:tile_width + right_pad, :] = im_tmp[0:tile_height, 0:right_pad, :] next_col_offset = right_pad if bot_tile is not None: # a tile from the image is in <path>/<scale>/tile_i_j.ppm im_tmp = imread(path + '/' + str(args.scale) + '/' + bot_tile['name']) im[tile_height:tile_height + bot_pad, 0:tile_width, :] = im_tmp[0:bot_pad, 0:tile_width, :] next_row_offset = bot_pad if rb_tile is not None: # a tile from the image is in <path>/<scale>/tile_i_j.ppm im_tmp = imread(path + '/' + str(args.scale) + '/' + rb_tile['name']) im[tile_height:tile_height + bot_pad, tile_width:tile_width + right_pad, :] = im_tmp[0:bot_pad, 0:right_pad, :] im_tmp = None # discard # From the current tile (padded), we need to process the region # (row_offset, col_offset) -> (im.height, im.width) (with new # height and width). But there might still be some restrictions # due to the region of interest (row_min, col_min) -> (row_max, col_max). # These last coordinates are in global coordinate system! So, first we # convert them to (rmn, cmn) -> (rmx, cmx), and lower bound them to # the offset: rmn = max(row_min - int(tile['y0']), row_offset) rmx = min(row_max - int(tile['y0']) + 1, im.shape[0]) cmn = max(col_min - int(tile['x0']), col_offset) cmx = min(col_max - int(tile['x0']) + 1, im.shape[1]) ## print("Final region of the image:", rmn, rmx, cmn, cmx) im = im[rmn:rmx, cmn:cmx, :] # image to process # tile contains the real coordinates of the region in the image crt_row_min = int(tile['y0']) crt_col_min = int(tile['x0']) col_offset = next_col_offset ## print("Next offsets:", row_offset, col_offset) ## print("=======================================================") ## print("=======================================================") # Finally, we have the image for analysis. Don't forget to transform the coordinates # from current tile system to global image system when saving the results. if im.shape[0] < args.wsize or im.shape[1] < args.wsize: # (what is left of the) tile is smaller than the window size continue # get the H and E planes: h, e, _ = rgb2he2(im) if args.gabor: print("---------> Gabor descriptors:") g = GaborDescriptor() desc_label = 'gabor' print("------------> H plane") # on H-plane: img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) id = np.zeros((1, len(dsc))) # we do not cluster here... # save clustering/contexts - remember, the coordinates are in the # current tile/image system -> should add back the shift z1 = desc_to_matrix( dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0:2] += crt_row_min + rmn z1[:, 2:4] += crt_col_min + cmn z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + tile['name'] + '_' + desc_label + '_h.dat', z2, delimiter="\t") if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) id = np.zeros((1, len(dsc))) # we do not cluster here... # save clustering/contexts - remember, the coordinates are in the # current tile/image system -> should add back the shift z1 = desc_to_matrix( dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0:2] += crt_row_min + rmn z1[:, 2:4] += crt_col_min + cmn z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + tile['name'] + '_' + desc_label + '_e.dat', z2, delimiter="\t") print("OK") # end for j... row_offset = next_row_offset # end for i.... return
def main(): p = opt.ArgumentParser(description=""" Segments a number of rectangular contexts from a H&E slide. The contexts are clusters of similar regions of the image. The similarity is based on various textural descriptors. """) p.add_argument('img_file', action='store', help='RGB image file') p.add_argument('ctxt', action='store', help='Number of contexts to extract', type=int) p.add_argument('wsize', action='store', help='Size of the (square) regions', type=int) p.add_argument( 'roi', action='store', help='a file with ROI coordinates (and context descriptors)') p.add_argument('label', action='store', help='the cluster label of interest') p.add_argument('--prefix', action='store', help='optional prefix for the resulting files', default=None) p.add_argument( '--gabor', action='store_true', help='compute Gabor descriptors and generate the corresponding contexts' ) p.add_argument( '--lbp', action='store_true', help= 'compute LBP (local binary patterns) descriptors and generate the corresponding contexts' ) p.add_argument( '--mfs', action='store_true', help= 'compute fractal descriptors and generate the corresponding contexts') p.add_argument('--eosine', action='store_true', help='should also Eosine component be processed?') p.add_argument('--scale', action='store', type=float, default=1.0, help='scaling factor for ROI coordinates') args = p.parse_args() base_name = os.path.basename(args.img_file).split('.') if len(base_name) > 1: # at least 1 suffix .ext base_name.pop() # drop the extension base_name = '.'.join( base_name) # reassemble the rest of the list into file name if args.prefix is not None: pfx = args.prefix else: pfx = base_name ROIs = [] for l in file(args.roi).readlines(): # extract the coordinates and the label from each ROI # (one per row): lb, row_min, row_max, col_min, col_max = map(lambda _x: int(float(_x)), l.split('\t')[1:5]) row_min = int(mh.floor(row_min * args.scale)) row_max = int(mh.floor(row_max * args.scale)) col_min = int(mh.floor(col_min * args.scale)) col_max = int(mh.floor(col_max * args.scale)) if lb == args.label: ROIs.append([row_min, row_max, col_min, col_max]) im = imread(args.img_file) print("Original image size:", im.shape) # get the H and E planes: h, e, _ = rgb2he2(im) if args.gabor: print("---------> Gabor descriptors:") g = GaborDescriptor() desc_label = 'gabor' print("------------> H plane") # on H-plane: img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) dst = pdist_gabor(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0] += row_min + dh z1[:, 2] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2) if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) dst = pdist_gabor(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0] += row_min + dh z1[:, 2] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2) print("OK") if args.haralick: print("---------> Haralick descriptors:") g = GLCMDescriptor() desc_label = 'haralick' print("------------> H plane") # on H-plane: img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) dst = pdist_gabor(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0] += row_min + dh z1[:, 2] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2) if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) dst = pdist_gabor(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0] += row_min + dh z1[:, 2] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2) print("OK") if args.lbp: print("---------> LBP descriptors:") g = LBPDescriptor() desc_label = 'lbp' # on H-plane: print("------------> H plane") img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) dst = pdist_lbp(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0] += row_min + dh z1[:, 2] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2) if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) dst = pdist_lbp(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0] += row_min + dh z1[:, 2] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2) print("OK") if args.mfs: print("---------> MFS descriptors:") g = MFSDescriptor() desc_label = 'mfs' # on H-plane: print("------------> H plane") img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) dst = pdist_mfs(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0] += row_min + dh z1[:, 2] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2) if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) dst = pdist_mfs(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0] += row_min + dh z1[:, 2] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2) print("OK") return
def main(): p = opt.ArgumentParser(description=""" Assigns the regions of an image to the clusters of a codebook. """) p.add_argument('image', action='store', help='image file name') p.add_argument('config', action='store', help='a configuration file') p.add_argument( '-r', '--roi', action='store', nargs=4, type=int, help= 'region of interest from the image as: row_min row_max col_min col_max', default=None) args = p.parse_args() img_file = args.image cfg_file = args.config image_orig = skimage.io.imread(img_file) if image_orig.ndim == 3: im_h, _, _ = rgb2he2(image_orig) if args.roi is None: roi = (0, im_h.shape[0] - 1, 0, im_h.shape[1] - 1) else: roi = args.roi # Process configuration file: parser = SafeConfigParser() parser.read(cfg_file) if not parser.has_section('data'): raise RuntimeError('Section [data] is mandatory') wsize = (32, 32) if parser.has_option('data', 'window_size'): wsize = ast.literal_eval(parser.get('data', 'window_size')) if not parser.has_option('data', 'model'): raise RuntimeError('model file name is missing in [data] section') model_file = parser.get('data', 'model') with ModelPersistence(model_file, 'r', format='pickle') as mp: codebook = mp['codebook'] Xm = mp['shift'] Xs = mp['scale'] standardize = mp['standardize'] if parser.has_option('data', 'output'): out_file = parser.get('data', 'output') else: out_file = 'output.dat' descriptors = read_local_descriptors_cfg(parser) # For the moment, it is assumed tha only one type of local descriptors is # used - no composite feature vectors. This will change in the future but, # for the moment only the first type of descriptor in "descriptors" list # is used, and the codebook is assumed to be constructed using the same. desc = descriptors[0] print(img_file) print(wsize) print(roi[0], roi[1], roi[2], roi[3]) w_offset = (0, 0) if isinstance(desc, HaarLikeDescriptor): # this one works on integral images image = intg_image(im_h) # the sliding window should also be increased by 1: w_offset = (1, 1) wsize = (wsize[0] + w_offset[0], wsize[1] + w_offset[1]) else: image = im_h itw = sliding_window_on_regions(image.shape, [tuple(roi)], wsize, step=wsize) wnd = [] labels = [] buff_size = 10000 # every <buff_size> patches we do a classification X = np.zeros((buff_size, codebook.cluster_centers_[0].shape[0])) k = 0 if standardize: # placed here, to avoid testing inside the loop for r in itw: # adjust if needed: r2 = (r[0], r[1] - w_offset[1], r[2], r[3] - w_offset[0]) wnd.append(r2) X[k, :] = desc.compute(image[r[0]:r[1], r[2]:r[3]]) k += 1 if k == buff_size: X = (X - Xm) / Xs labels.extend(codebook.predict(X).tolist()) k = 0 # reset the block else: for r in itw: # adjust if needed: r2 = (r[0], r[1] - w_offset[1], r[2], r[3] - w_offset[0]) wnd.append(r2) X[k, :] = desc.compute(image[r[0]:r[1], r[2]:r[3]]) k += 1 if k == buff_size: labels.extend(codebook.predict(X).tolist()) k = 0 # reset the block if k != 0: # it means some data is accumulated in X but not yet classified if standardize: X[0:k + 1, ] = (X[0:k + 1, ] - Xm) / Xs labels.extend(codebook.predict(X[0:k + 1, ]).tolist()) with open(out_file, 'w') as f: n = len(wnd) # total number of descriptors of this type for k in range(n): s = '\t'.join([str(x_) for x_ in wnd[k]]) + '\t' + str(labels[k]) + '\n' f.write(s)
def main(): p = opt.ArgumentParser(description=""" Segments a number of rectangular contexts from a H&E slide. The contexts are clusters of similar regions of the image. The similarity is based on various textural descriptors. """) p.add_argument('img_file', action='store', help='RGB image file') p.add_argument('ctxt', action='store', help='Number of contexts to extract', type=int) p.add_argument('wsize', action='store', help='Size of the (square) regions', type=int) p.add_argument('roi', action='store', help='a file with ROI coordinates (and context descriptors)') p.add_argument('label', action='store', help='the cluster label of interest') p.add_argument('--prefix', action='store', help='optional prefix for the resulting files', default=None) p.add_argument('--gabor', action='store_true', help='compute Gabor descriptors and generate the corresponding contexts') p.add_argument('--lbp', action='store_true', help='compute LBP (local binary patterns) descriptors and generate the corresponding contexts') p.add_argument('--mfs', action='store_true', help='compute fractal descriptors and generate the corresponding contexts') p.add_argument('--eosine', action='store_true', help='should also Eosine component be processed?') p.add_argument('--scale', action='store', type=float, default=1.0, help='scaling factor for ROI coordinates') args = p.parse_args() base_name = os.path.basename(args.img_file).split('.') if len(base_name) > 1: # at least 1 suffix .ext base_name.pop() # drop the extension base_name = '.'.join(base_name) # reassemble the rest of the list into file name if args.prefix is not None: pfx = args.prefix else: pfx = base_name ROIs = [] for l in file(args.roi).readlines(): # extract the coordinates and the label from each ROI # (one per row): lb, row_min, row_max, col_min, col_max = map(lambda _x: int(float(_x)), l.split('\t')[1:5]) row_min = int(mh.floor(row_min * args.scale)) row_max = int(mh.floor(row_max * args.scale)) col_min = int(mh.floor(col_min * args.scale)) col_max = int(mh.floor(col_max * args.scale)) if lb == args.label: ROIs.append([row_min, row_max, col_min, col_max]) im = imread(args.img_file) print("Original image size:", im.shape) # get the H and E planes: h, e, _ = rgb2he2(im) if args.gabor: print("---------> Gabor descriptors:") g = GaborDescriptor() desc_label = 'gabor' print("------------> H plane") # on H-plane: img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize,args.wsize), step=(args.wsize,args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) dst = pdist_gabor(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0] += row_min + dh z1[:, 2] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack( (z2, z1) ) np.savetxt(pfx+'_'+desc_label+'_h.dat', z2, delimiter="\t") # save visualizations for k in range(1,1+args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx+'_'+desc_label+'_h_'+str(k)+'.ppm', im2) if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize,args.wsize), step=(args.wsize,args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) dst = pdist_gabor(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0] += row_min + dh z1[:, 2] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack( (z2, z1) ) np.savetxt(pfx+'_'+desc_label+'_e.dat', z2, delimiter="\t") # save visualizations for k in range(1,1+args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx+'_'+desc_label+'_e_'+str(k)+'.ppm', im2) print("OK") if args.haralick: print("---------> Haralick descriptors:") g = GLCMDescriptor() desc_label = 'haralick' print("------------> H plane") # on H-plane: img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize,args.wsize), step=(args.wsize,args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) dst = pdist_gabor(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0] += row_min + dh z1[:, 2] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack( (z2, z1) ) np.savetxt(pfx+'_'+desc_label+'_h.dat', z2, delimiter="\t") # save visualizations for k in range(1,1+args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx+'_'+desc_label+'_h_'+str(k)+'.ppm', im2) if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize,args.wsize), step=(args.wsize,args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) dst = pdist_gabor(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0] += row_min + dh z1[:, 2] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack( (z2, z1) ) np.savetxt(pfx+'_'+desc_label+'_e.dat', z2, delimiter="\t") # save visualizations for k in range(1,1+args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx+'_'+desc_label+'_e_'+str(k)+'.ppm', im2) print("OK") if args.lbp: print("---------> LBP descriptors:") g = LBPDescriptor() desc_label = 'lbp' # on H-plane: print("------------> H plane") img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize,args.wsize), step=(args.wsize,args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) dst = pdist_lbp(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0] += row_min + dh z1[:, 2] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack( (z2, z1) ) np.savetxt(pfx+'_'+desc_label+'_h.dat', z2, delimiter="\t") # save visualizations for k in range(1,1+args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx+'_'+desc_label+'_h_'+str(k)+'.ppm', im2) if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize,args.wsize), step=(args.wsize,args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) dst = pdist_lbp(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0] += row_min + dh z1[:, 2] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack( (z2, z1) ) np.savetxt(pfx+'_'+desc_label+'_e.dat', z2, delimiter="\t") # save visualizations for k in range(1,1+args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx+'_'+desc_label+'_e_'+str(k)+'.ppm', im2) print("OK") if args.mfs: print("---------> MFS descriptors:") g = MFSDescriptor() desc_label = 'mfs' # on H-plane: print("------------> H plane") img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize,args.wsize), step=(args.wsize,args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) dst = pdist_mfs(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0] += row_min + dh z1[:, 2] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack( (z2, z1) ) np.savetxt(pfx+'_'+desc_label+'_h.dat', z2, delimiter="\t") # save visualizations for k in range(1,1+args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx+'_'+desc_label+'_h_'+str(k)+'.ppm', im2) if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize,args.wsize), step=(args.wsize,args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) dst = pdist_mfs(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0] += row_min + dh z1[:, 2] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack( (z2, z1) ) np.savetxt(pfx+'_'+desc_label+'_e.dat', z2, delimiter="\t") # save visualizations for k in range(1,1+args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx+'_'+desc_label+'_e_'+str(k)+'.ppm', im2) print("OK") return
def main(): p = opt.ArgumentParser(description=""" Constructs a dictionary for image representation based on a set of specified local descriptors. The dictionary is built from a set of images given as a list in an input file. """) p.add_argument('config', action='store', help='a configuration file') args = p.parse_args() cfg_file = args.config parser = SafeConfigParser() parser.read(cfg_file) #--------- # sampler: if not parser.has_section('sampler'): raise ValueError('"sampler" section is mandatory') if not parser.has_option('sampler', 'type'): raise ValueError('"sampler.type" is mandatory') tmp = parser.get('sampler', 'type').lower() if tmp not in ['random', 'sliding']: raise ValueError('Unkown sampling type') sampler_type = tmp if not parser.has_option('sampler', 'window_size'): raise ValueError('"sampler.window_size" is mandatory') wnd_size = ast.literal_eval(parser.get('sampler', 'window_size')) if type(wnd_size) != tuple: raise ValueError('"sampler.window_size" specification error') it_start = (0,0) it_step = (1,1) if sampler_type == 'sliding': if parser.has_option('sampler', 'start'): it_start = ast.literal_eval(parser.get('sampler','start')) if parser.has_option('sampler', 'step'): it_step = ast.literal_eval(parser.get('sampler','step')) nwindows = parser.getint('sampler', 'nwindows') local_descriptors = [] #--------- # haar: if parser.has_section('haar'): tmp = True if parser.has_option('haar', 'norm'): tmp = parser.getboolean('haar', 'norm') if len(parser.items('haar')) == 0: # empty section, use defaults h = HaarLikeDescriptor(HaarLikeDescriptor.haars1()) else: h = HaarLikeDescriptor([ast.literal_eval(v) for n, v in parser.items('haar') if n.lower() != 'norm'], _norm=tmp) local_descriptors.append(h) #--------- # identity: if parser.has_section('identity'): local_descriptors.append(IdentityDescriptor()) #--------- # stats: if parser.has_section('stats'): tmp = [] if parser.has_option('stats', 'mean') and parser.getboolean('stats', 'mean'): tmp.append('mean') if parser.has_option('stats', 'std') and parser.getboolean('stats', 'std'): tmp.append('std') if parser.has_option('stats', 'kurtosis') and parser.getboolean('stats', 'kurtosis'): tmp.append('kurtosis') if parser.has_option('stats', 'skewness') and parser.getboolean('stats', 'skewness'): tmp.append('skewness') if len(tmp) == 0: tmp = None local_descriptors.append(StatsDescriptor(tmp)) #--------- # hist: if parser.has_section('hist'): tmp = (0.0, 1.0) tmp2 = 10 if parser.has_option('hist', 'min_max'): tmp = ast.literal_eval(parser.get('hist', 'min_max')) if type(tmp) != tuple: raise ValueError('"hist.min_max" specification error') if parser.has_option('hist', 'nbins'): tmp2 = parser.getint('hist', 'nbins') local_descriptors.append(HistDescriptor(_interval=tmp, _nbins=tmp2)) #--------- # HoG if parser.has_section('hog'): tmp = 9 tmp2 = (128, 128) tmp3 = (4, 4) if parser.has_option('hog', 'norient'): tmp = parser.getint('hog', 'norient') if parser.has_option('hog', 'ppc'): tmp2 = ast.literal_eval(parser.get('hog', 'ppc')) if type(tmp2) != tuple: raise ValueError('"hog.ppc" specification error') if parser.has_option('hog', 'cpb'): tmp3 = ast.literal_eval(parser.get('hog', 'cpb')) if type(tmp3) != tuple: raise ValueError('"hog.cpb" specification error') local_descriptors.append(HOGDescriptor(_norient=tmp, _ppc=tmp2, _cpb=tmp3)) #--------- # LBP if parser.has_section('lbp'): tmp = 3 tmp2 = 8*tmp tmp3 = 'uniform' if parser.has_option('lbp', 'radius'): tmp = parser.getint('lbp', 'radius') if parser.has_option('lbp', 'npoints'): tmp2 = parser.getint('lbp', 'npoints') if tmp2 == 0: tmp2 = 8* tmp if parser.has_option('lbp', 'method'): tmp3 = parser.get('lbp', 'method') local_descriptors.append(LBPDescriptor(radius=tmp, npoints=tmp2, method=tmp3)) #--------- # Gabor if parser.has_section('gabor'): tmp = np.array([0.0, np.pi / 4.0, np.pi / 2.0, 3.0 * np.pi / 4.0], dtype=np.double) tmp2 = np.array([3.0 / 4.0, 3.0 / 8.0, 3.0 / 16.0], dtype=np.double) tmp3 = np.array([1.0, 2 * np.sqrt(2.0)], dtype=np.double) if parser.has_option('gabor', 'theta'): tmp = ast.literal_eval(parser.get('gabor', 'theta')) if parser.has_option('gabor', 'freq'): tmp2 = ast.literal_eval(parser.get('gabor', 'freq')) if parser.has_option('gabor', 'sigma'): tmp3 = ast.literal_eval(parser.get('gabor', 'sigma')) local_descriptors.append(GaborDescriptor(theta=tmp, freq=tmp2, sigma=tmp3)) print('No. of descriptors: ', len(local_descriptors)) #--------- # data if not parser.has_section('data'): raise ValueError('Section "data" is mandatory.') data_path = parser.get('data', 'input_path') img_ext = parser.get('data', 'image_type') res_path = parser.get('data', 'output_path') img_files = glob.glob(data_path + '/*.' + img_ext) if len(img_files) == 0: return ## Process: sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) # unbuferred output for img_name in img_files: print("Image: ", img_name, " ...reading... ", end='') im = imread(img_name) print("preprocessing... ", end='') # -preprocessing if im.ndim == 3: im_h, _, _ = rgb2he2(im) else: raise ValueError('Input image must be RGB.') # detect object region: # -try to load a precomputed mask: mask_file_name = data_path+'/mask/'+ \ os.path.splitext(os.path.split(img_name)[1])[0]+ \ '_tissue_mask.pbm' if os.path.exists(mask_file_name): print('(loading mask)...', end='') mask = imread(mask_file_name) mask = img_as_bool(mask) mask = remove_small_objects(mask, min_size=500, connectivity=1, in_place=True) else: print('(computing mask)...', end='') mask, _ = tissue_region_from_rgb(im, _min_area=500) row_min, col_min, row_max, col_max = bounding_box(mask) im_h[np.logical_not(mask)] = 0 # make sure background is 0 mask = None im = None im_h = im_h[row_min:row_max+1, col_min:col_max+1] print("growing the bag...", end='') # -image bag growing bag = None # bag for current image for d in local_descriptors: if bag is None: bag = grow_bag_from_new_image(im_h, d, wnd_size, nwindows, discard_empty=True) else: bag[d.name] = grow_bag_with_new_features(im_h, bag['regs'], d)[d.name] # save the results for each image, one file per descriptor desc_names = bag.keys() desc_names.remove('regs') # keep all keys but the regions # -save the ROI from the original image: res_file = res_path + '/' + 'roi-' + \ os.path.splitext(os.path.split(img_name)[1])[0] + '.dat' with open(res_file, 'w') as f: f.write('\t'.join([str(x_) for x_ in [row_min, row_max, col_min, col_max]])) for dn in desc_names: res_file = res_path + '/' + dn + '_bag-' + \ os.path.splitext(os.path.split(img_name)[1])[0] + '.dat' with open(res_file, 'w') as f: n = len(bag[dn]) # total number of descriptors of this type for i in range(n): s = '\t'.join([str(x_) for x_ in bag['regs'][i]]) + '\t' + \ '\t'.join([str(x_) for x_ in bag[dn][i]]) + '\n' f.write(s) print('OK') bag = None gc.collect() gc.collect()
def main(): p = opt.ArgumentParser(description=""" Assigns the regions of an image to the clusters of a codebook. """) p.add_argument('image', action='store', help='image file name') p.add_argument('config', action='store', help='a configuration file') p.add_argument('-r', '--roi', action='store', nargs=4, type=int, help='region of interest from the image as: row_min row_max col_min col_max', default=None) args = p.parse_args() img_file = args.image cfg_file = args.config image_orig = skimage.io.imread(img_file) if image_orig.ndim == 3: im_h, _, _ = rgb2he2(image_orig) if args.roi is None: roi = (0, img.shape[0]-1, 0, img.shape[1]-1) else: roi = args.roi # Process configuration file: parser = SafeConfigParser() parser.read(cfg_file) if not parser.has_section('data'): raise RuntimeError('Section [data] is mandatory') wsize = (32, 32) if parser.has_option('data', 'window_size'): wsize = ast.literal_eval(parser.get('data', 'window_size')) if not parser.has_option('data', 'model'): raise RuntimeError('model file name is missing in [data] section') model_file = parser.get('data', 'model') with ModelPersistence(model_file, 'r', format='pickle') as mp: codebook = mp['codebook'] Xm = mp['shift'] Xs = mp['scale'] standardize = mp['standardize'] if parser.has_option('data', 'output'): out_file = parser.get('data', 'output') else: out_file = 'output.dat' descriptors = read_local_descriptors_cfg(parser) # For the moment, it is assumed tha only one type of local descriptors is # used - no composite feature vectors. This will change in the future but, # for the moment only the first type of descriptor in "descriptors" list # is used, and the codebook is assumed to be constructed using the same. desc = descriptors[0] print(img_file) print(wsize) print(roi[0], roi[1], roi[2], roi[3]) w_offset = (0, 0) if isinstance(desc, HaarLikeDescriptor): # this one works on integral images image = intg_image(im_h) # the sliding window should also be increased by 1: w_offset = (1, 1) wsize = (wsize[0] + w_offset[0], wsize[1] + w_offset[1]) else: image = im_h itw = sliding_window_on_regions(image.shape, [tuple(roi)], wsize, step=wsize) wnd = [] labels = [] buff_size = 10000 # every <buff_size> patches we do a classification X = np.zeros((buff_size, codebook.cluster_centers_[0].shape[0])) k = 0 if standardize: # placed here, to avoid testing inside the loop for r in itw: # adjust if needed: r2 = (r[0], r[1] - w_offset[1], r[2], r[3] - w_offset[0]) wnd.append(r2) X[k,:] = desc.compute(image[r[0]:r[1], r[2]:r[3]]) k += 1 if k == buff_size: X = (X - Xm) / Xs labels.extend(codebook.predict(X).tolist()) k = 0 # reset the block else: for r in itw: # adjust if needed: r2 = (r[0], r[1] - w_offset[1], r[2], r[3] - w_offset[0]) wnd.append(r2) X[k,:] = desc.compute(image[r[0]:r[1], r[2]:r[3]]) k += 1 if k == buff_size: labels.extend(codebook.predict(X).tolist()) k = 0 # reset the block if k != 0: # it means some data is accumulated in X but not yet classified if standardize: X[0:k+1,] = (X[0:k+1,] - Xm) / Xs labels.extend(codebook.predict(X[0:k+1,]).tolist()) with open(out_file, 'w') as f: n = len(wnd) # total number of descriptors of this type for k in range(n): s = '\t'.join([str(x_) for x_ in wnd[k]]) + '\t' + str(labels[k]) + '\n' f.write(s)
def main(): p = opt.ArgumentParser(description=""" Constructs a dictionary for image representation based on a set of specified local descriptors. The dictionary is built from a set of images given as a list in an input file. """) p.add_argument('config', action='store', help='a configuration file') args = p.parse_args() cfg_file = args.config parser = SafeConfigParser() parser.read(cfg_file) #--------- # sampler: if not parser.has_section('sampler'): raise ValueError('"sampler" section is mandatory') if not parser.has_option('sampler', 'type'): raise ValueError('"sampler.type" is mandatory') tmp = parser.get('sampler', 'type').lower() if tmp not in ['random', 'sliding']: raise ValueError('Unkown sampling type') sampler_type = tmp if not parser.has_option('sampler', 'window_size'): raise ValueError('"sampler.window_size" is mandatory') wnd_size = ast.literal_eval(parser.get('sampler', 'window_size')) if type(wnd_size) != tuple: raise ValueError('"sampler.window_size" specification error') it_start = (0, 0) it_step = (1, 1) if sampler_type == 'sliding': if parser.has_option('sampler', 'start'): it_start = ast.literal_eval(parser.get('sampler', 'start')) if parser.has_option('sampler', 'step'): it_step = ast.literal_eval(parser.get('sampler', 'step')) nwindows = parser.getint('sampler', 'nwindows') local_descriptors = [] #--------- # haar: if parser.has_section('haar'): tmp = True if parser.has_option('haar', 'norm'): tmp = parser.getboolean('haar', 'norm') if len(parser.items('haar')) == 0: # empty section, use defaults h = HaarLikeDescriptor(HaarLikeDescriptor.haars1()) else: h = HaarLikeDescriptor([ ast.literal_eval(v) for n, v in parser.items('haar') if n.lower() != 'norm' ], _norm=tmp) local_descriptors.append(h) #--------- # identity: if parser.has_section('identity'): local_descriptors.append(IdentityDescriptor()) #--------- # stats: if parser.has_section('stats'): tmp = [] if parser.has_option('stats', 'mean') and parser.getboolean( 'stats', 'mean'): tmp.append('mean') if parser.has_option('stats', 'std') and parser.getboolean( 'stats', 'std'): tmp.append('std') if parser.has_option('stats', 'kurtosis') and parser.getboolean( 'stats', 'kurtosis'): tmp.append('kurtosis') if parser.has_option('stats', 'skewness') and parser.getboolean( 'stats', 'skewness'): tmp.append('skewness') if len(tmp) == 0: tmp = None local_descriptors.append(StatsDescriptor(tmp)) #--------- # hist: if parser.has_section('hist'): tmp = (0.0, 1.0) tmp2 = 10 if parser.has_option('hist', 'min_max'): tmp = ast.literal_eval(parser.get('hist', 'min_max')) if type(tmp) != tuple: raise ValueError('"hist.min_max" specification error') if parser.has_option('hist', 'nbins'): tmp2 = parser.getint('hist', 'nbins') local_descriptors.append(HistDescriptor(_interval=tmp, _nbins=tmp2)) #--------- # HoG if parser.has_section('hog'): tmp = 9 tmp2 = (128, 128) tmp3 = (4, 4) if parser.has_option('hog', 'norient'): tmp = parser.getint('hog', 'norient') if parser.has_option('hog', 'ppc'): tmp2 = ast.literal_eval(parser.get('hog', 'ppc')) if type(tmp2) != tuple: raise ValueError('"hog.ppc" specification error') if parser.has_option('hog', 'cpb'): tmp3 = ast.literal_eval(parser.get('hog', 'cpb')) if type(tmp3) != tuple: raise ValueError('"hog.cpb" specification error') local_descriptors.append( HOGDescriptor(_norient=tmp, _ppc=tmp2, _cpb=tmp3)) #--------- # LBP if parser.has_section('lbp'): tmp = 3 tmp2 = 8 * tmp tmp3 = 'uniform' if parser.has_option('lbp', 'radius'): tmp = parser.getint('lbp', 'radius') if parser.has_option('lbp', 'npoints'): tmp2 = parser.getint('lbp', 'npoints') if tmp2 == 0: tmp2 = 8 * tmp if parser.has_option('lbp', 'method'): tmp3 = parser.get('lbp', 'method') local_descriptors.append( LBPDescriptor(radius=tmp, npoints=tmp2, method=tmp3)) #--------- # Gabor if parser.has_section('gabor'): tmp = np.array([0.0, np.pi / 4.0, np.pi / 2.0, 3.0 * np.pi / 4.0], dtype=np.double) tmp2 = np.array([3.0 / 4.0, 3.0 / 8.0, 3.0 / 16.0], dtype=np.double) tmp3 = np.array([1.0, 2 * np.sqrt(2.0)], dtype=np.double) if parser.has_option('gabor', 'theta'): tmp = ast.literal_eval(parser.get('gabor', 'theta')) if parser.has_option('gabor', 'freq'): tmp2 = ast.literal_eval(parser.get('gabor', 'freq')) if parser.has_option('gabor', 'sigma'): tmp3 = ast.literal_eval(parser.get('gabor', 'sigma')) local_descriptors.append( GaborDescriptor(theta=tmp, freq=tmp2, sigma=tmp3)) print('No. of descriptors: ', len(local_descriptors)) #--------- # data if not parser.has_section('data'): raise ValueError('Section "data" is mandatory.') data_path = parser.get('data', 'input_path') img_ext = parser.get('data', 'image_type') res_path = parser.get('data', 'output_path') img_files = glob.glob(data_path + '/*.' + img_ext) if len(img_files) == 0: return ## Process: sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) # unbuferred output for img_name in img_files: print("Image: ", img_name, " ...reading... ", end='') im = imread(img_name) print("preprocessing... ", end='') # -preprocessing if im.ndim == 3: im_h, _, _ = rgb2he2(im) else: raise ValueError('Input image must be RGB.') # detect object region: # -try to load a precomputed mask: mask_file_name = data_path+'/mask/'+ \ os.path.splitext(os.path.split(img_name)[1])[0]+ \ '_tissue_mask.pbm' if os.path.exists(mask_file_name): print('(loading mask)...', end='') mask = imread(mask_file_name) mask = img_as_bool(mask) mask = remove_small_objects(mask, min_size=500, connectivity=1, in_place=True) else: print('(computing mask)...', end='') mask, _ = tissue_region_from_rgb(im, _min_area=500) row_min, col_min, row_max, col_max = bounding_box(mask) im_h[np.logical_not(mask)] = 0 # make sure background is 0 mask = None im = None im_h = im_h[row_min:row_max + 1, col_min:col_max + 1] print("growing the bag...", end='') # -image bag growing bag = None # bag for current image for d in local_descriptors: if bag is None: bag = grow_bag_from_new_image(im_h, d, wnd_size, nwindows, discard_empty=True) else: bag[d.name] = grow_bag_with_new_features(im_h, bag['regs'], d)[d.name] # save the results for each image, one file per descriptor desc_names = bag.keys() desc_names.remove('regs') # keep all keys but the regions # -save the ROI from the original image: res_file = res_path + '/' + 'roi-' + \ os.path.splitext(os.path.split(img_name)[1])[0] + '.dat' with open(res_file, 'w') as f: f.write('\t'.join( [str(x_) for x_ in [row_min, row_max, col_min, col_max]])) for dn in desc_names: res_file = res_path + '/' + dn + '_bag-' + \ os.path.splitext(os.path.split(img_name)[1])[0] + '.dat' with open(res_file, 'w') as f: n = len(bag[dn]) # total number of descriptors of this type for i in range(n): s = '\t'.join([str(x_) for x_ in bag['regs'][i]]) + '\t' + \ '\t'.join([str(x_) for x_ in bag[dn][i]]) + '\n' f.write(s) print('OK') bag = None gc.collect() gc.collect()
def main(): p = opt.ArgumentParser(description=""" Segments a number of rectangular contexts from a H&E slide. The contexts are clusters of similar regions of the image. The similarity is based on various textural descriptors. """) p.add_argument('meta_file', action='store', help='XML file describing the structure of the imported file') p.add_argument('scale', action='store', help='which of the scales to be processed') p.add_argument('ctxt', action='store', help='number of contexts to extract', type=int) p.add_argument('wsize', action='store', help='size of the (square) regions', type=int) p.add_argument('--prefix', action='store', help='optional prefix for the resulting files', default=None) p.add_argument('--gabor', action='store_true', help='compute Gabor descriptors and generate the corresponding contexts') p.add_argument('--lbp', action='store_true', help='compute LBP (local binary patterns) descriptors and generate the corresponding contexts') p.add_argument('--mfs', action='store_true', help='compute fractal descriptors and generate the corresponding contexts') p.add_argument('--haralick', action='store_true', help='compute Haralick descriptors and generate the corresponding contexts') p.add_argument('--row_min', action='store', type=int, help='start row (rows start at 0)', default=0) p.add_argument('--col_min', action='store', type=int, help='start column (columns start at 0)', default=0) p.add_argument('--row_max', action='store', type=int, help='end row (maximum: image height-1)', default=0) p.add_argument('--col_max', action='store', type=int, help='end column (maximum: image width-1)', default=0) p.add_argument('--eosine', action='store_true', help='should also Eosine component be processed?') args = p.parse_args() xml_file = ET.parse(args.meta_file) xml_root = xml_file.getroot() # find the name of the image: base_name = os.path.basename(xml_root.find('file').text).split('.') if len(base_name) > 1: # at least 1 suffix .ext base_name.pop() # drop the extension base_name = '.'.join(base_name) # reassemble the rest of the list into file name if args.prefix is not None: pfx = args.prefix else: pfx = base_name path = os.path.dirname(args.meta_file) # Check if the required scale exists: vrs = [_x for _x in xml_root.findall('version') if _x.find('scale').text == args.scale] if len(vrs) == 0: raise ValueError('The requested scale does not exits.') if len(vrs) > 1: raise ValueError('Inconsistency detected for the requested scale.') all_tiles = vrs[0].findall('tile') # get the info about full image: im_width = int(xml_root.find('original/width').text) im_height = int(xml_root.find('original/height').text) row_min = min(max(args.row_min, 0), im_height-2) col_min = min(max(args.col_min, 0), im_width-2) row_max = max(min(args.row_max, im_height-1), 0) col_max = max(min(args.col_max, im_width-1), 0) if row_max == 0: row_max = im_height - 1 if col_max == 0: col_max = im_width - 1 if row_max - row_min < args.wsize or col_max - col_min < args.wsize: raise ValueError('Window size too large for requested image size.') # keep only the tiles that overlap with the specified region tiles = [tl.attrib for tl in all_tiles if int(tl.attrib['x1']) >= col_min and col_max >= int(tl.attrib['x0']) and int(tl.attrib['y1']) >= row_min and row_max >= int(tl.attrib['y0'])] ## print("ROI covers", len(tiles), "tiles") # Sort the tiles from top to bottom and left to right. # -get all the (i,j) indices of the tiles: rx = re.compile(r'[_.]') ij = np.array([map(int, rx.split(t['name'])[1:3]) for t in tiles]) # -find i_min, i_max, j_min and j_max. Since the tiles are consecutive # (on row and column), these are enough to generate the desired order: tile_i_min, tile_j_min = ij.min(axis=0) tile_i_max, tile_j_max = ij.max(axis=0) row_offset = 0 for i in range(tile_i_min, tile_i_max+1): col_offset = 0 for j in range(tile_j_min, tile_j_max+1): # double-check that tile_i_j is in the list of tiles: idx = map(lambda _x,_y: _x['name'] == _y, tiles, len(tiles)*['tile_'+str(i)+'_'+str(j)+'.ppm']) if not any(idx): raise RuntimeError("Missing tile" + 'tile_'+str(i)+'_'+str(j)+'.ppm') tile = tiles[idx.index(True)] ## print("Current tile:", tile['name']) # Idea: the current tile (i,j) might need to be extended with a stripe # of maximum args.wsize to the left and bottom. So we load (if they # are available) the tiles (i,j+1), (i+1,j) and (i+1,j+1) and extend # the current tile... # a tile from the image is in <path>/<scale>/tile_i_j.ppm im = imread(path + '/' + str(args.scale) + '/' + tile['name']) tile_height, tile_width, _ = im.shape ## print("Tile size:", tile_height, "x", tile_width) # The scanning (sliding) windows will start at (row_offset, col_offset) # (in this tile's coordinate system). We want to have an integer number # of windows so, if needed (and possible) we will extend the current # tile with a block of pixels from the neighboring tiles. # number of windows on the horizontal need_expand_right = False right_pad = 0 right_tile = None if j < tile_j_max: # then we could eventually expand if (tile_width - col_offset) % args.wsize != 0: need_expand_right = True nh = int(mh.ceil((tile_width - col_offset) / args.wsize)) right_pad = nh*args.wsize - (tile_width - col_offset) tile_name = 'tile_'+str(i)+'_'+str(j+1)+'.ppm' idx = map(lambda _x,_y: _x['name'] == _y, tiles, len(tiles)*[tile_name]) assert(any(idx)) right_tile = tiles[idx.index(True)] # number of windows on the vertical need_expand_bot = False bot_pad = 0 bot_tile = None if i < tile_i_max: if (tile_height - row_offset) % args.wsize != 0: need_expand_bot = True nv = int(mh.ceil((tile_height - row_offset) / args.wsize)) bot_pad = nv*args.wsize - (tile_height - row_offset) tile_name = 'tile_'+str(i+1)+'_'+str(j)+'.ppm' idx = map(lambda _x,_y: _x['name'] == _y, tiles, len(tiles)*[tile_name]) assert(any(idx)) bot_tile = tiles[idx.index(True)] ## print("Expand: right=", need_expand_right, "bottom=", need_expand_bot) ## print("...by: right=", right_pad, "bottom=", bot_pad, "pixels") rb_tile = None if need_expand_right and need_expand_bot: # this MUST exist if the right and bottom tiles above exist: tile_name = 'tile_'+str(i+1)+'_'+str(j+1)+'.ppm' idx = map(lambda _x,_y: _x['name'] == _y, tiles, len(tiles)*[tile_name]) assert(any(idx)) rb_tile = tiles[idx.index(True)] ## if right_tile is not None: ## print("Expansion tile right:", right_tile['name']) ## if bot_tile is not None: ## print("Expansion tile bottom:", bot_tile['name']) ## if rb_tile is not None: ## print("Expansion tile bottom-right:", rb_tile['name']) # expand the image to the right and bottom only if there is a neighboring tile in # that direction r = 1 if right_tile is not None else 0 b = 1 if bot_tile is not None else 0 next_row_offset, next_col_offset = 0, 0 if r+b > 0: # we need to (and we can) pad the image with pixels from neighbors # Enlarge the image to the right and bottom: # The following line gives an error. (TypeError: 'unicode' object is not callable) Why? # im = np.pad(im, ((0, bot_pad), (0, right_pad), (0, 0)), mode='constant') im_tmp = np.zeros((tile_height+b*bot_pad, tile_width+r*right_pad, im.shape[2])) im_tmp[0:tile_height, 0:tile_width, :] = im im = im_tmp if right_tile is not None: # a tile from the image is in <path>/<scale>/tile_i_j.ppm im_tmp = imread(path + '/' + str(args.scale) + '/' + right_tile['name']) im[0:tile_height, tile_width:tile_width+right_pad, :] = im_tmp[0:tile_height, 0:right_pad, :] next_col_offset = right_pad if bot_tile is not None: # a tile from the image is in <path>/<scale>/tile_i_j.ppm im_tmp = imread(path + '/' + str(args.scale) + '/' + bot_tile['name']) im[tile_height:tile_height+bot_pad, 0:tile_width, :] = im_tmp[0:bot_pad, 0:tile_width, :] next_row_offset = bot_pad if rb_tile is not None: # a tile from the image is in <path>/<scale>/tile_i_j.ppm im_tmp = imread(path + '/' + str(args.scale) + '/' + rb_tile['name']) im[tile_height:tile_height+bot_pad, tile_width:tile_width+right_pad, :] = im_tmp[0:bot_pad, 0:right_pad, :] im_tmp = None # discard # From the current tile (padded), we need to process the region # (row_offset, col_offset) -> (im.height, im.width) (with new # height and width). But there might still be some restrictions # due to the region of interest (row_min, col_min) -> (row_max, col_max). # These last coordinates are in global coordinate system! So, first we # convert them to (rmn, cmn) -> (rmx, cmx), and lower bound them to # the offset: rmn = max(row_min - int(tile['y0']), row_offset) rmx = min(row_max - int(tile['y0']) + 1, im.shape[0]) cmn = max(col_min - int(tile['x0']), col_offset) cmx = min(col_max - int(tile['x0']) + 1, im.shape[1]) ## print("Final region of the image:", rmn, rmx, cmn, cmx) im = im[rmn:rmx, cmn:cmx, :] # image to process # tile contains the real coordinates of the region in the image crt_row_min = int(tile['y0']) crt_col_min = int(tile['x0']) col_offset = next_col_offset ## print("Next offsets:", row_offset, col_offset) ## print("=======================================================") ## print("=======================================================") # Finally, we have the image for analysis. Don't forget to transform the coordinates # from current tile system to global image system when saving the results. if im.shape[0] < args.wsize or im.shape[1] < args.wsize: # (what is left of the) tile is smaller than the window size continue # get the H and E planes: h, e, _ = rgb2he2(im) if args.gabor: print("---------> Gabor descriptors:") g = GaborDescriptor() desc_label = 'gabor' print("------------> H plane") # on H-plane: img_iterator = sliding_window(h.shape, (args.wsize,args.wsize), step=(args.wsize,args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) id = np.zeros((1, len(dsc))) # we do not cluster here... # save clustering/contexts - remember, the coordinates are in the # current tile/image system -> should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0:2] += crt_row_min + rmn z1[:, 2:4] += crt_col_min + cmn z2 = np.matrix(id).transpose() z2 = np.hstack( (z2, z1) ) np.savetxt(pfx+'_'+tile['name']+'_'+desc_label+'_h.dat', z2, delimiter="\t") if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window(h.shape, (args.wsize,args.wsize), step=(args.wsize,args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) id = np.zeros((1, len(dsc))) # we do not cluster here... # save clustering/contexts - remember, the coordinates are in the # current tile/image system -> should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0:2] += crt_row_min + rmn z1[:, 2:4] += crt_col_min + cmn z2 = np.matrix(id).transpose() z2 = np.hstack( (z2, z1) ) np.savetxt(pfx+'_'+tile['name']+'_'+desc_label+'_e.dat', z2, delimiter="\t") print("OK") # end for j... row_offset = next_row_offset # end for i.... return
def main(): p = opt.ArgumentParser(description=""" Segments a number of rectangular contexts from a H&E slide. The contexts are clusters of similar regions of the image. The similarity is based on various textural descriptors. """) p.add_argument('img_file', action='store', help='RGB image file') p.add_argument('ctxt', action='store', help='Number of contexts to extract', type=int) p.add_argument('wsize', action='store', help='Size of the (square) regions', type=int) p.add_argument('--prefix', action='store', help='optional prefix for the resulting files', default=None) p.add_argument( '--gabor', action='store_true', help='compute Gabor descriptors and generate the corresponding contexts' ) p.add_argument( '--lbp', action='store_true', help= 'compute LBP (local binary patterns) descriptors and generate the corresponding contexts' ) p.add_argument( '--mfs', action='store_true', help= 'compute fractal descriptors and generate the corresponding contexts') p.add_argument( '--haralick', action='store_true', help= 'compute Haralick descriptors and generate the corresponding contexts') p.add_argument('--row_min', action='store', type=int, help='start row (rows start at 0)', default=0) p.add_argument('--col_min', action='store', type=int, help='start column (columns start at 0)', default=0) p.add_argument('--row_max', action='store', type=int, help='end row (maximum: image height-1)', default=0) p.add_argument('--col_max', action='store', type=int, help='end column (maximum: image width-1)', default=0) p.add_argument('--eosine', action='store_true', help='should also Eosine component be processed?') args = p.parse_args() base_name = os.path.basename(args.img_file).split('.') if len(base_name) > 1: # at least 1 suffix .ext base_name.pop() # drop the extension base_name = '.'.join( base_name) # reassemble the rest of the list into file name if args.prefix is not None: pfx = args.prefix else: pfx = base_name im = imread(args.img_file) print("Original image size:", im.shape) row_min = min(max(args.row_min, 0), im.shape[0] - 2) col_min = min(max(args.col_min, 0), im.shape[1] - 2) row_max = max(min(args.row_max, im.shape[0] - 1), 0) col_max = max(min(args.col_max, im.shape[1] - 1), 0) if row_max == 0: row_max = im.shape[0] - 1 if col_max == 0: col_max = im.shape[1] - 1 if row_max - row_min < args.wsize or col_max - col_min < args.wsize: raise ValueError('Window size too large for requested image size.') im = im[row_min:row_max + 1, col_min:col_max + 1, :] # crop the image to multiple of wsize: nh, nw = mh.floor(im.shape[0] / args.wsize), mh.floor(im.shape[1] / args.wsize) dh, dw = mh.floor((im.shape[0] - nh * args.wsize) / 2), mh.floor( (im.shape[1] - nw * args.wsize) / 2) im = im[dh:dh + nh * args.wsize, dw:dw + nw * args.wsize, :] print("Image cropped to:", im.shape) imsave(pfx + '_cropped.ppm', im) # get the H and E planes: h, e, _ = rgb2he2(im) if args.gabor: print("---------> Gabor descriptors:") g = GaborDescriptor() desc_label = 'gabor' print("------------> H plane") # on H-plane: img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) dst = pdist_gabor(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix(dsc, desc_label) # col 0: row_min, col 2: col_min z1[:, 0] += row_min + dh z1[:, 2] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2) if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) dst = pdist_gabor(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix( dsc, desc_label) # col 0:4 [row_min, row_max, col_min, col_max] z1[:, 0:2] += row_min + dh z1[:, 2:4] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2) print("OK") if args.haralick: print("---------> Haralick descriptors:") g = GLCMDescriptor() desc_label = 'haralick' print("------------> H plane") # on H-plane: img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) dst = pdist_gabor(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix( dsc, desc_label) # col 0:4 [row_min, row_max, col_min, col_max] z1[:, 0:2] += row_min + dh z1[:, 2:4] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2) if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) dst = pdist_gabor(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix( dsc, desc_label) # col 0:4 [row_min, row_max, col_min, col_max] z1[:, 0:2] += row_min + dh z1[:, 2:4] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2) print("OK") if args.lbp: print("---------> LBP descriptors:") g = LBPDescriptor() desc_label = 'lbp' # on H-plane: print("------------> H plane") img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) dst = pdist_lbp(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix( dsc, desc_label) # col 0:4 [row_min, row_max, col_min, col_max] z1[:, 0:2] += row_min + dh z1[:, 2:4] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2) if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) dst = pdist_lbp(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix( dsc, desc_label) # col 0:4 [row_min, row_max, col_min, col_max] z1[:, 0:2] += row_min + dh z1[:, 2:4] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2) print("OK") if args.mfs: print("---------> MFS descriptors:") g = MFSDescriptor() desc_label = 'mfs' # on H-plane: print("------------> H plane") img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(h, g, img_iterator, desc_label) dst = pdist_mfs(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix( dsc, desc_label) # col 0:4 [row_min, row_max, col_min, col_max] z1[:, 0:2] += row_min + dh z1[:, 2:4] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2) if args.eosine: # repeat on E plane: print("------------> E plane") img_iterator = sliding_window(h.shape, (args.wsize, args.wsize), step=(args.wsize, args.wsize)) dsc = get_local_desc(e, g, img_iterator, desc_label) dst = pdist_mfs(dsc) cl = average(dst) id = fcluster(cl, t=args.ctxt, criterion='maxclust') # get the various contexts # save clustering/contexts - remember, the coordinates are in the # current image system which might have been cropped from the original -> # should add back the shift z1 = desc_to_matrix( dsc, desc_label) # col 0:4 [row_min, row_max, col_min, col_max] z1[:, 0:2] += row_min + dh z1[:, 2:4] += col_min + dw z2 = np.matrix(id).transpose() z2 = np.hstack((z2, z1)) np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t") # save visualizations for k in range(1, 1 + args.ctxt): i = np.where(id == k)[0] p = [dsc[j]['roi'] for j in i] im2 = enhance_patches(im, p) imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2) print("OK") return