Example #1
0
def extract_descriptors_he(_img, w_size, _ncpus=None):
    """
    EXRACT_LOCAL_DESCRIPTORS_HE: extracts a set of local descriptors of the image:
        - histogram of Hue values
        - histogram of haematoxylin and eosin planes
        - Gabor descriptors in haematoxylin and eosin spaces, respectively
        - local binary patterns in haematoxylin and eosin spaces, respectively

    :param _img: numpy.ndarray

    :param w_size: int

    :return: list
    """
    assert (_img.ndim == 3)

    img_iterator = sliding_window(_img.shape[:-1], (w_size, w_size), step=(w_size, w_size))  # non-overlapping windows
    gabor = GaborDescriptor()
    lbp   = LBPDescriptor()

    hsv = rgb2hsv(_img)
    h, e, _ = rgb2he2(_img)

    res = []
    with ProcessPoolExecutor(max_workers=_ncpus) as executor:
        for w_coords in img_iterator:
            res.append(executor.submit(_worker2, hsv[:,:,0], h, e, gabor, lbp, w_coords))

    desc = []
    for f in as_completed(res):
        desc.append(f.result())

    return desc
Example #2
0
def tissue_region_from_rgb(_img, _min_area=150, _g_th=None):
    """
    TISSUE_REGION_FROM_RGB detects the region(s) of the image containing the
    tissue. The original image is supposed to represent a haematoxylin-eosin
    -stained pathology slide.
    
    The main purpose of this function is to detect the parts of a large image
    which most probably contain tissue material, and to discard the background.
    
    Usage:
        tissue_mask = tissue_from_rgb(img, _min_area=150, _g_th=None)
        
    Args:
        img (numpy.ndarray): the original image in RGB color space
        _min_area (int, default: 150): any object with an area smaller than 
            the indicated value, will be discarded
        _g_th (int, default: None): the processing is done on the GREEN channel
            and all pixels below _g_th are considered candidates for "tissue
            pixels". If no value is given to _g_th, one is computed by K-Means
            clustering (K=2), and is returned.
        
    Returns:
        numpy.ndarray: a binary image containing the mask of the regions
            considered to represent tissue fragments
        int: threshold used for GREEN channel
    """

    if _g_th is None:
        # Apply vector quantization to remove the "white" background - work in the
        # green channel:
        vq = MiniBatchKMeans(n_clusters=2)
        _g_th = int(
            np.round(0.95 * np.max(
                vq.fit(_G(_img).reshape((-1, 1))).cluster_centers_.squeeze())))

    mask = _G(_img) < _g_th

    skm.binary_closing(mask, skm.disk(3), out=mask)
    mask = img_as_bool(mask)
    mask = skm.remove_small_objects(mask, min_size=_min_area, in_place=True)

    # Some hand-picked rules:
    # -at least 5% H and E
    # -at most 25% background
    # for a region to be considered tissue

    h, e, b = rgb2he2(_img)

    mask &= (h > np.percentile(h, 5)) | (e > np.percentile(e, 5))
    mask &= (b < np.percentile(b, 50))  # at most at 50% of "other components"

    mask = mh.close_holes(mask)

    return img_as_bool(mask), _g_th
Example #3
0
def tissue_region_from_rgb(_img, _min_area=150, _g_th=None):
    """
    TISSUE_REGION_FROM_RGB detects the region(s) of the image containing the
    tissue. The original image is supposed to represent a haematoxylin-eosin
    -stained pathology slide.
    
    The main purpose of this function is to detect the parts of a large image
    which most probably contain tissue material, and to discard the background.
    
    Usage:
        tissue_mask = tissue_from_rgb(img, _min_area=150, _g_th=None)
        
    Args:
        img (numpy.ndarray): the original image in RGB color space
        _min_area (int, default: 150): any object with an area smaller than 
            the indicated value, will be discarded
        _g_th (int, default: None): the processing is done on the GREEN channel
            and all pixels below _g_th are considered candidates for "tissue
            pixels". If no value is given to _g_th, one is computed by K-Means
            clustering (K=2), and is returned.
        
    Returns:
        numpy.ndarray: a binary image containing the mask of the regions
            considered to represent tissue fragments
        int: threshold used for GREEN channel
    """
    
    if _g_th is None:
        # Apply vector quantization to remove the "white" background - work in the
        # green channel:
        vq = MiniBatchKMeans(n_clusters=2)
        _g_th = int(np.round(0.95 * np.max(vq.fit(_G(_img).reshape((-1,1)))
                                           .cluster_centers_.squeeze())))
    
    mask = _G(_img) < _g_th

    skm.binary_closing(mask, skm.disk(3), out=mask)
    mask = img_as_bool(mask)
    mask = skm.remove_small_objects(mask, min_size=_min_area, in_place=True)


    # Some hand-picked rules:
    # -at least 5% H and E
    # -at most 25% background
    # for a region to be considered tissue

    h, e, b = rgb2he2(_img)

    mask &= (h > np.percentile(h, 5)) | (e > np.percentile(e, 5))
    mask &= (b < np.percentile(b, 50))               # at most at 50% of "other components"

    mask = mh.close_holes(mask)

    return img_as_bool(mask), _g_th
Example #4
0
def extract_descriptors_he(_img, w_size, _ncpus=None):
    """
    EXRACT_LOCAL_DESCRIPTORS_HE: extracts a set of local descriptors of the image:
        - histogram of Hue values
        - histogram of haematoxylin and eosin planes
        - Gabor descriptors in haematoxylin and eosin spaces, respectively
        - local binary patterns in haematoxylin and eosin spaces, respectively

    :param _img: numpy.ndarray

    :param w_size: int

    :return: list
    """
    assert (_img.ndim == 3)

    img_iterator = sliding_window(_img.shape[:-1], (w_size, w_size),
                                  step=(w_size,
                                        w_size))  # non-overlapping windows
    gabor = GaborDescriptor()
    lbp = LBPDescriptor()

    hsv = rgb2hsv(_img)
    h, e, _ = rgb2he2(_img)

    res = []
    with ProcessPoolExecutor(max_workers=_ncpus) as executor:
        for w_coords in img_iterator:
            res.append(
                executor.submit(_worker2, hsv[:, :, 0], h, e, gabor, lbp,
                                w_coords))

    desc = []
    for f in as_completed(res):
        desc.append(f.result())

    return desc
Example #5
0
def main():
    p = opt.ArgumentParser(description="""
            Segments a number of rectangular contexts from a H&E slide. The contexts are clusters
            of similar regions of the image. The similarity is based on various textural
            descriptors.
            """)
    p.add_argument(
        'meta_file',
        action='store',
        help='XML file describing the structure of the imported file')
    p.add_argument('scale',
                   action='store',
                   help='which of the scales to be processed')
    p.add_argument('ctxt',
                   action='store',
                   help='number of contexts to extract',
                   type=int)
    p.add_argument('wsize',
                   action='store',
                   help='size of the (square) regions',
                   type=int)
    p.add_argument('--prefix',
                   action='store',
                   help='optional prefix for the resulting files',
                   default=None)
    p.add_argument(
        '--gabor',
        action='store_true',
        help='compute Gabor descriptors and generate the corresponding contexts'
    )
    p.add_argument(
        '--lbp',
        action='store_true',
        help=
        'compute LBP (local binary patterns) descriptors and generate the corresponding contexts'
    )
    p.add_argument(
        '--mfs',
        action='store_true',
        help=
        'compute fractal descriptors and generate the corresponding contexts')
    p.add_argument(
        '--haralick',
        action='store_true',
        help=
        'compute Haralick descriptors and generate the corresponding contexts')
    p.add_argument('--row_min',
                   action='store',
                   type=int,
                   help='start row (rows start at 0)',
                   default=0)
    p.add_argument('--col_min',
                   action='store',
                   type=int,
                   help='start column (columns start at 0)',
                   default=0)
    p.add_argument('--row_max',
                   action='store',
                   type=int,
                   help='end row (maximum: image height-1)',
                   default=0)
    p.add_argument('--col_max',
                   action='store',
                   type=int,
                   help='end column (maximum: image width-1)',
                   default=0)
    p.add_argument('--eosine',
                   action='store_true',
                   help='should also Eosine component be processed?')

    args = p.parse_args()

    xml_file = ET.parse(args.meta_file)
    xml_root = xml_file.getroot()

    # find the name of the image:
    base_name = os.path.basename(xml_root.find('file').text).split('.')
    if len(base_name) > 1:  # at least 1 suffix .ext
        base_name.pop()  # drop the extension
        base_name = '.'.join(
            base_name)  # reassemble the rest of the list into file name

    if args.prefix is not None:
        pfx = args.prefix
    else:
        pfx = base_name

    path = os.path.dirname(args.meta_file)

    # Check if the required scale exists:
    vrs = [
        _x for _x in xml_root.findall('version')
        if _x.find('scale').text == args.scale
    ]
    if len(vrs) == 0:
        raise ValueError('The requested scale does not exits.')
    if len(vrs) > 1:
        raise ValueError('Inconsistency detected for the requested scale.')
    all_tiles = vrs[0].findall('tile')

    # get the info about full image:
    im_width = int(xml_root.find('original/width').text)
    im_height = int(xml_root.find('original/height').text)

    row_min = min(max(args.row_min, 0), im_height - 2)
    col_min = min(max(args.col_min, 0), im_width - 2)
    row_max = max(min(args.row_max, im_height - 1), 0)
    col_max = max(min(args.col_max, im_width - 1), 0)

    if row_max == 0:
        row_max = im_height - 1
    if col_max == 0:
        col_max = im_width - 1

    if row_max - row_min < args.wsize or col_max - col_min < args.wsize:
        raise ValueError('Window size too large for requested image size.')

    # keep only the tiles that overlap with the specified region
    tiles = [
        tl.attrib for tl in all_tiles
        if int(tl.attrib['x1']) >= col_min and col_max >= int(tl.attrib['x0'])
        and int(tl.attrib['y1']) >= row_min and row_max >= int(tl.attrib['y0'])
    ]

    ## print("ROI covers", len(tiles), "tiles")

    # Sort the tiles from top to bottom and left to right.
    # -get all the (i,j) indices of the tiles:
    rx = re.compile(r'[_.]')
    ij = np.array([map(int, rx.split(t['name'])[1:3]) for t in tiles])
    # -find i_min, i_max, j_min and j_max. Since the tiles are consecutive
    # (on row and column), these are enough to generate the desired order:
    tile_i_min, tile_j_min = ij.min(axis=0)
    tile_i_max, tile_j_max = ij.max(axis=0)

    row_offset = 0
    for i in range(tile_i_min, tile_i_max + 1):
        col_offset = 0
        for j in range(tile_j_min, tile_j_max + 1):
            # double-check that tile_i_j is in the list of tiles:
            idx = map(lambda _x, _y: _x['name'] == _y, tiles,
                      len(tiles) * ['tile_' + str(i) + '_' + str(j) + '.ppm'])
            if not any(idx):
                raise RuntimeError("Missing tile" + 'tile_' + str(i) + '_' +
                                   str(j) + '.ppm')
            tile = tiles[idx.index(True)]
            ## print("Current tile:", tile['name'])

            # Idea: the current tile (i,j) might need to be extended with a stripe
            # of maximum args.wsize to the left and bottom. So we load (if they
            # are available) the tiles (i,j+1), (i+1,j) and (i+1,j+1) and extend
            # the current tile...

            # a tile from the image is in <path>/<scale>/tile_i_j.ppm
            im = imread(path + '/' + str(args.scale) + '/' + tile['name'])
            tile_height, tile_width, _ = im.shape

            ## print("Tile size:", tile_height, "x", tile_width)

            # The scanning (sliding) windows will start at (row_offset, col_offset)
            # (in this tile's coordinate system). We want to have an integer number
            # of windows so, if needed (and possible) we will extend the current
            # tile with a block of pixels from the neighboring tiles.

            # number of windows on the horizontal
            need_expand_right = False
            right_pad = 0
            right_tile = None
            if j < tile_j_max:  # then we could eventually expand
                if (tile_width - col_offset) % args.wsize != 0:
                    need_expand_right = True
                    nh = int(mh.ceil((tile_width - col_offset) / args.wsize))
                    right_pad = nh * args.wsize - (tile_width - col_offset)
                    tile_name = 'tile_' + str(i) + '_' + str(j + 1) + '.ppm'
                    idx = map(lambda _x, _y: _x['name'] == _y, tiles,
                              len(tiles) * [tile_name])
                    assert (any(idx))
                    right_tile = tiles[idx.index(True)]

            # number of windows on the vertical
            need_expand_bot = False
            bot_pad = 0
            bot_tile = None
            if i < tile_i_max:
                if (tile_height - row_offset) % args.wsize != 0:
                    need_expand_bot = True
                    nv = int(mh.ceil((tile_height - row_offset) / args.wsize))
                    bot_pad = nv * args.wsize - (tile_height - row_offset)
                    tile_name = 'tile_' + str(i + 1) + '_' + str(j) + '.ppm'
                    idx = map(lambda _x, _y: _x['name'] == _y, tiles,
                              len(tiles) * [tile_name])
                    assert (any(idx))
                    bot_tile = tiles[idx.index(True)]

            ## print("Expand: right=", need_expand_right, "bottom=", need_expand_bot)
            ## print("...by: right=", right_pad, "bottom=", bot_pad, "pixels")

            rb_tile = None
            if need_expand_right and need_expand_bot:
                # this MUST exist if the right and bottom tiles above exist:
                tile_name = 'tile_' + str(i + 1) + '_' + str(j + 1) + '.ppm'
                idx = map(lambda _x, _y: _x['name'] == _y, tiles,
                          len(tiles) * [tile_name])
                assert (any(idx))
                rb_tile = tiles[idx.index(True)]

            ## if right_tile is not None:
            ##     print("Expansion tile right:", right_tile['name'])
            ## if bot_tile is not None:
            ##     print("Expansion tile bottom:", bot_tile['name'])
            ## if rb_tile is not None:
            ##     print("Expansion tile bottom-right:", rb_tile['name'])

            # expand the image to the right and bottom only if there is a neighboring tile in
            # that direction
            r = 1 if right_tile is not None else 0
            b = 1 if bot_tile is not None else 0

            next_row_offset, next_col_offset = 0, 0

            if r + b > 0:  # we need to (and we can) pad the image with pixels from neighbors
                # Enlarge the image to the right and bottom:

                # The following line gives an error. (TypeError: 'unicode' object is not callable) Why?
                # im = np.pad(im, ((0, bot_pad), (0, right_pad), (0, 0)), mode='constant')
                im_tmp = np.zeros((tile_height + b * bot_pad,
                                   tile_width + r * right_pad, im.shape[2]))
                im_tmp[0:tile_height, 0:tile_width, :] = im
                im = im_tmp

                if right_tile is not None:
                    # a tile from the image is in <path>/<scale>/tile_i_j.ppm
                    im_tmp = imread(path + '/' + str(args.scale) + '/' +
                                    right_tile['name'])
                    im[0:tile_height, tile_width:tile_width +
                       right_pad, :] = im_tmp[0:tile_height, 0:right_pad, :]
                    next_col_offset = right_pad

                if bot_tile is not None:
                    # a tile from the image is in <path>/<scale>/tile_i_j.ppm
                    im_tmp = imread(path + '/' + str(args.scale) + '/' +
                                    bot_tile['name'])
                    im[tile_height:tile_height + bot_pad,
                       0:tile_width, :] = im_tmp[0:bot_pad, 0:tile_width, :]
                    next_row_offset = bot_pad

                if rb_tile is not None:
                    # a tile from the image is in <path>/<scale>/tile_i_j.ppm
                    im_tmp = imread(path + '/' + str(args.scale) + '/' +
                                    rb_tile['name'])
                    im[tile_height:tile_height + bot_pad,
                       tile_width:tile_width +
                       right_pad, :] = im_tmp[0:bot_pad, 0:right_pad, :]

                im_tmp = None  # discard

            # From the current tile (padded), we need to process the region
            # (row_offset, col_offset) -> (im.height, im.width) (with new
            # height and width). But there might still be some restrictions
            # due to the region of interest (row_min, col_min) -> (row_max, col_max).
            # These last coordinates are in global coordinate system! So, first we
            # convert them to (rmn, cmn) -> (rmx, cmx), and lower bound them to
            # the offset:
            rmn = max(row_min - int(tile['y0']), row_offset)
            rmx = min(row_max - int(tile['y0']) + 1, im.shape[0])
            cmn = max(col_min - int(tile['x0']), col_offset)
            cmx = min(col_max - int(tile['x0']) + 1, im.shape[1])

            ## print("Final region of the image:", rmn, rmx, cmn, cmx)

            im = im[rmn:rmx, cmn:cmx, :]  # image to process

            # tile contains the real coordinates of the region in the image
            crt_row_min = int(tile['y0'])
            crt_col_min = int(tile['x0'])

            col_offset = next_col_offset

            ## print("Next offsets:", row_offset, col_offset)
            ## print("=======================================================")
            ## print("=======================================================")

            # Finally, we have the image for analysis. Don't forget to transform the coordinates
            # from current tile system to global image system when saving the results.
            if im.shape[0] < args.wsize or im.shape[1] < args.wsize:
                # (what is left of the) tile is smaller than the window size
                continue
            # get the H and E planes:
            h, e, _ = rgb2he2(im)

            if args.gabor:
                print("---------> Gabor descriptors:")
                g = GaborDescriptor()
                desc_label = 'gabor'

                print("------------> H plane")
                # on H-plane:
                img_iterator = sliding_window(h.shape,
                                              (args.wsize, args.wsize),
                                              step=(args.wsize, args.wsize))
                dsc = get_local_desc(h, g, img_iterator, desc_label)
                id = np.zeros((1, len(dsc)))  # we do not cluster here...

                # save clustering/contexts - remember, the coordinates are in the
                # current tile/image system -> should add back the shift
                z1 = desc_to_matrix(
                    dsc, desc_label)  # col 0: row_min, col 2: col_min
                z1[:, 0:2] += crt_row_min + rmn
                z1[:, 2:4] += crt_col_min + cmn
                z2 = np.matrix(id).transpose()
                z2 = np.hstack((z2, z1))
                np.savetxt(pfx + '_' + tile['name'] + '_' + desc_label +
                           '_h.dat',
                           z2,
                           delimiter="\t")

                if args.eosine:
                    # repeat on E plane:
                    print("------------> E plane")
                    img_iterator = sliding_window(h.shape,
                                                  (args.wsize, args.wsize),
                                                  step=(args.wsize,
                                                        args.wsize))
                    dsc = get_local_desc(e, g, img_iterator, desc_label)
                    id = np.zeros((1, len(dsc)))  # we do not cluster here...

                    # save clustering/contexts - remember, the coordinates are in the
                    # current tile/image system -> should add back the shift
                    z1 = desc_to_matrix(
                        dsc, desc_label)  # col 0: row_min, col 2: col_min
                    z1[:, 0:2] += crt_row_min + rmn
                    z1[:, 2:4] += crt_col_min + cmn
                    z2 = np.matrix(id).transpose()
                    z2 = np.hstack((z2, z1))
                    np.savetxt(pfx + '_' + tile['name'] + '_' + desc_label +
                               '_e.dat',
                               z2,
                               delimiter="\t")
                print("OK")

        # end for j...
        row_offset = next_row_offset
    # end for i....

    return
Example #6
0
def main():
    p = opt.ArgumentParser(description="""
            Segments a number of rectangular contexts from a H&E slide. The contexts are clusters
            of similar regions of the image. The similarity is based on various textural
            descriptors.
            """)
    p.add_argument('img_file', action='store', help='RGB image file')
    p.add_argument('ctxt',
                   action='store',
                   help='Number of contexts to extract',
                   type=int)
    p.add_argument('wsize',
                   action='store',
                   help='Size of the (square) regions',
                   type=int)
    p.add_argument(
        'roi',
        action='store',
        help='a file with ROI coordinates (and context descriptors)')
    p.add_argument('label',
                   action='store',
                   help='the cluster label of interest')

    p.add_argument('--prefix',
                   action='store',
                   help='optional prefix for the resulting files',
                   default=None)
    p.add_argument(
        '--gabor',
        action='store_true',
        help='compute Gabor descriptors and generate the corresponding contexts'
    )
    p.add_argument(
        '--lbp',
        action='store_true',
        help=
        'compute LBP (local binary patterns) descriptors and generate the corresponding contexts'
    )
    p.add_argument(
        '--mfs',
        action='store_true',
        help=
        'compute fractal descriptors and generate the corresponding contexts')
    p.add_argument('--eosine',
                   action='store_true',
                   help='should also Eosine component be processed?')

    p.add_argument('--scale',
                   action='store',
                   type=float,
                   default=1.0,
                   help='scaling factor for ROI coordinates')

    args = p.parse_args()

    base_name = os.path.basename(args.img_file).split('.')
    if len(base_name) > 1:  # at least 1 suffix .ext
        base_name.pop()  # drop the extension
        base_name = '.'.join(
            base_name)  # reassemble the rest of the list into file name

    if args.prefix is not None:
        pfx = args.prefix
    else:
        pfx = base_name

    ROIs = []
    for l in file(args.roi).readlines():
        # extract the coordinates and the label from each ROI
        # (one per row):
        lb, row_min, row_max, col_min, col_max = map(lambda _x: int(float(_x)),
                                                     l.split('\t')[1:5])
        row_min = int(mh.floor(row_min * args.scale))
        row_max = int(mh.floor(row_max * args.scale))
        col_min = int(mh.floor(col_min * args.scale))
        col_max = int(mh.floor(col_max * args.scale))
        if lb == args.label:
            ROIs.append([row_min, row_max, col_min, col_max])

    im = imread(args.img_file)
    print("Original image size:", im.shape)

    # get the H and E planes:
    h, e, _ = rgb2he2(im)

    if args.gabor:
        print("---------> Gabor descriptors:")
        g = GaborDescriptor()
        desc_label = 'gabor'

        print("------------> H plane")
        # on H-plane:
        img_iterator = sliding_window_on_regions(h.shape,
                                                 ROIs,
                                                 (args.wsize, args.wsize),
                                                 step=(args.wsize, args.wsize))
        dsc = get_local_desc(h, g, img_iterator, desc_label)

        dst = pdist_gabor(dsc)

        cl = average(dst)
        id = fcluster(cl, t=args.ctxt,
                      criterion='maxclust')  # get the various contexts

        # save clustering/contexts - remember, the coordinates are in the
        # current image system which might have been cropped from the original ->
        # should add back the shift
        z1 = desc_to_matrix(dsc, desc_label)  # col 0: row_min, col 2: col_min
        z1[:, 0] += row_min + dh
        z1[:, 2] += col_min + dw
        z2 = np.matrix(id).transpose()
        z2 = np.hstack((z2, z1))
        np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t")

        # save visualizations
        for k in range(1, 1 + args.ctxt):
            i = np.where(id == k)[0]
            p = [dsc[j]['roi'] for j in i]
            im2 = enhance_patches(im, p)
            imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2)

        if args.eosine:
            # repeat on E plane:
            print("------------> E plane")
            img_iterator = sliding_window_on_regions(h.shape,
                                                     ROIs,
                                                     (args.wsize, args.wsize),
                                                     step=(args.wsize,
                                                           args.wsize))
            dsc = get_local_desc(e, g, img_iterator, desc_label)

            dst = pdist_gabor(dsc)

            cl = average(dst)
            id = fcluster(cl, t=args.ctxt,
                          criterion='maxclust')  # get the various contexts

            # save clustering/contexts - remember, the coordinates are in the
            # current image system which might have been cropped from the original ->
            # should add back the shift
            z1 = desc_to_matrix(dsc,
                                desc_label)  # col 0: row_min, col 2: col_min
            z1[:, 0] += row_min + dh
            z1[:, 2] += col_min + dw
            z2 = np.matrix(id).transpose()
            z2 = np.hstack((z2, z1))
            np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t")

            # save visualizations
            for k in range(1, 1 + args.ctxt):
                i = np.where(id == k)[0]
                p = [dsc[j]['roi'] for j in i]
                im2 = enhance_patches(im, p)
                imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2)

        print("OK")

    if args.haralick:
        print("---------> Haralick descriptors:")
        g = GLCMDescriptor()
        desc_label = 'haralick'

        print("------------> H plane")
        # on H-plane:
        img_iterator = sliding_window_on_regions(h.shape,
                                                 ROIs,
                                                 (args.wsize, args.wsize),
                                                 step=(args.wsize, args.wsize))
        dsc = get_local_desc(h, g, img_iterator, desc_label)

        dst = pdist_gabor(dsc)

        cl = average(dst)
        id = fcluster(cl, t=args.ctxt,
                      criterion='maxclust')  # get the various contexts

        # save clustering/contexts - remember, the coordinates are in the
        # current image system which might have been cropped from the original ->
        # should add back the shift
        z1 = desc_to_matrix(dsc, desc_label)  # col 0: row_min, col 2: col_min
        z1[:, 0] += row_min + dh
        z1[:, 2] += col_min + dw
        z2 = np.matrix(id).transpose()
        z2 = np.hstack((z2, z1))
        np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t")

        # save visualizations
        for k in range(1, 1 + args.ctxt):
            i = np.where(id == k)[0]
            p = [dsc[j]['roi'] for j in i]
            im2 = enhance_patches(im, p)
            imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2)

        if args.eosine:
            # repeat on E plane:
            print("------------> E plane")
            img_iterator = sliding_window_on_regions(h.shape,
                                                     ROIs,
                                                     (args.wsize, args.wsize),
                                                     step=(args.wsize,
                                                           args.wsize))
            dsc = get_local_desc(e, g, img_iterator, desc_label)

            dst = pdist_gabor(dsc)

            cl = average(dst)
            id = fcluster(cl, t=args.ctxt,
                          criterion='maxclust')  # get the various contexts

            # save clustering/contexts - remember, the coordinates are in the
            # current image system which might have been cropped from the original ->
            # should add back the shift
            z1 = desc_to_matrix(dsc,
                                desc_label)  # col 0: row_min, col 2: col_min
            z1[:, 0] += row_min + dh
            z1[:, 2] += col_min + dw
            z2 = np.matrix(id).transpose()
            z2 = np.hstack((z2, z1))
            np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t")

            # save visualizations
            for k in range(1, 1 + args.ctxt):
                i = np.where(id == k)[0]
                p = [dsc[j]['roi'] for j in i]
                im2 = enhance_patches(im, p)
                imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2)

        print("OK")

    if args.lbp:
        print("---------> LBP descriptors:")
        g = LBPDescriptor()
        desc_label = 'lbp'

        # on H-plane:
        print("------------> H plane")
        img_iterator = sliding_window_on_regions(h.shape,
                                                 ROIs,
                                                 (args.wsize, args.wsize),
                                                 step=(args.wsize, args.wsize))
        dsc = get_local_desc(h, g, img_iterator, desc_label)

        dst = pdist_lbp(dsc)

        cl = average(dst)
        id = fcluster(cl, t=args.ctxt,
                      criterion='maxclust')  # get the various contexts

        # save clustering/contexts - remember, the coordinates are in the
        # current image system which might have been cropped from the original ->
        # should add back the shift
        z1 = desc_to_matrix(dsc, desc_label)  # col 0: row_min, col 2: col_min
        z1[:, 0] += row_min + dh
        z1[:, 2] += col_min + dw
        z2 = np.matrix(id).transpose()
        z2 = np.hstack((z2, z1))
        np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t")

        # save visualizations
        for k in range(1, 1 + args.ctxt):
            i = np.where(id == k)[0]
            p = [dsc[j]['roi'] for j in i]
            im2 = enhance_patches(im, p)
            imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2)

        if args.eosine:
            # repeat on E plane:
            print("------------> E plane")
            img_iterator = sliding_window_on_regions(h.shape,
                                                     ROIs,
                                                     (args.wsize, args.wsize),
                                                     step=(args.wsize,
                                                           args.wsize))
            dsc = get_local_desc(e, g, img_iterator, desc_label)

            dst = pdist_lbp(dsc)

            cl = average(dst)
            id = fcluster(cl, t=args.ctxt,
                          criterion='maxclust')  # get the various contexts

            # save clustering/contexts - remember, the coordinates are in the
            # current image system which might have been cropped from the original ->
            # should add back the shift
            z1 = desc_to_matrix(dsc,
                                desc_label)  # col 0: row_min, col 2: col_min
            z1[:, 0] += row_min + dh
            z1[:, 2] += col_min + dw
            z2 = np.matrix(id).transpose()
            z2 = np.hstack((z2, z1))
            np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t")

            # save visualizations
            for k in range(1, 1 + args.ctxt):
                i = np.where(id == k)[0]
                p = [dsc[j]['roi'] for j in i]
                im2 = enhance_patches(im, p)
                imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2)

        print("OK")

    if args.mfs:
        print("---------> MFS descriptors:")
        g = MFSDescriptor()
        desc_label = 'mfs'

        # on H-plane:
        print("------------> H plane")
        img_iterator = sliding_window_on_regions(h.shape,
                                                 ROIs,
                                                 (args.wsize, args.wsize),
                                                 step=(args.wsize, args.wsize))
        dsc = get_local_desc(h, g, img_iterator, desc_label)

        dst = pdist_mfs(dsc)

        cl = average(dst)
        id = fcluster(cl, t=args.ctxt,
                      criterion='maxclust')  # get the various contexts

        # save clustering/contexts
        # save clustering/contexts - remember, the coordinates are in the
        # current image system which might have been cropped from the original ->
        # should add back the shift
        z1 = desc_to_matrix(dsc, desc_label)  # col 0: row_min, col 2: col_min
        z1[:, 0] += row_min + dh
        z1[:, 2] += col_min + dw
        z2 = np.matrix(id).transpose()
        z2 = np.hstack((z2, z1))
        np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t")

        # save visualizations
        for k in range(1, 1 + args.ctxt):
            i = np.where(id == k)[0]
            p = [dsc[j]['roi'] for j in i]
            im2 = enhance_patches(im, p)
            imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2)

        if args.eosine:
            # repeat on E plane:
            print("------------> E plane")
            img_iterator = sliding_window_on_regions(h.shape,
                                                     ROIs,
                                                     (args.wsize, args.wsize),
                                                     step=(args.wsize,
                                                           args.wsize))
            dsc = get_local_desc(e, g, img_iterator, desc_label)

            dst = pdist_mfs(dsc)

            cl = average(dst)
            id = fcluster(cl, t=args.ctxt,
                          criterion='maxclust')  # get the various contexts

            # save clustering/contexts - remember, the coordinates are in the
            # current image system which might have been cropped from the original ->
            # should add back the shift
            z1 = desc_to_matrix(dsc,
                                desc_label)  # col 0: row_min, col 2: col_min
            z1[:, 0] += row_min + dh
            z1[:, 2] += col_min + dw
            z2 = np.matrix(id).transpose()
            z2 = np.hstack((z2, z1))
            np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t")

            # save visualizations
            for k in range(1, 1 + args.ctxt):
                i = np.where(id == k)[0]
                p = [dsc[j]['roi'] for j in i]
                im2 = enhance_patches(im, p)
                imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2)

        print("OK")

    return
Example #7
0
def main():
    p = opt.ArgumentParser(description="""
    Assigns the regions of an image to the clusters of a codebook.
    """)
    p.add_argument('image', action='store', help='image file name')
    p.add_argument('config', action='store', help='a configuration file')
    p.add_argument(
        '-r',
        '--roi',
        action='store',
        nargs=4,
        type=int,
        help=
        'region of interest from the image as: row_min row_max col_min col_max',
        default=None)
    args = p.parse_args()
    img_file = args.image
    cfg_file = args.config

    image_orig = skimage.io.imread(img_file)
    if image_orig.ndim == 3:
        im_h, _, _ = rgb2he2(image_orig)

    if args.roi is None:
        roi = (0, im_h.shape[0] - 1, 0, im_h.shape[1] - 1)
    else:
        roi = args.roi

    # Process configuration file:
    parser = SafeConfigParser()
    parser.read(cfg_file)

    if not parser.has_section('data'):
        raise RuntimeError('Section [data] is mandatory')
    wsize = (32, 32)
    if parser.has_option('data', 'window_size'):
        wsize = ast.literal_eval(parser.get('data', 'window_size'))

    if not parser.has_option('data', 'model'):
        raise RuntimeError('model file name is missing in [data] section')
    model_file = parser.get('data', 'model')
    with ModelPersistence(model_file, 'r', format='pickle') as mp:
        codebook = mp['codebook']
        Xm = mp['shift']
        Xs = mp['scale']
        standardize = mp['standardize']

    if parser.has_option('data', 'output'):
        out_file = parser.get('data', 'output')
    else:
        out_file = 'output.dat'

    descriptors = read_local_descriptors_cfg(parser)

    # For the moment, it is assumed tha only one type of local descriptors is
    # used - no composite feature vectors. This will change in the future but,
    # for the moment only the first type of descriptor in "descriptors" list
    # is used, and the codebook is assumed to be constructed using the same.

    desc = descriptors[0]

    print(img_file)
    print(wsize)
    print(roi[0], roi[1], roi[2], roi[3])

    w_offset = (0, 0)
    if isinstance(desc, HaarLikeDescriptor):
        # this one works on integral images
        image = intg_image(im_h)
        # the sliding window should also be increased by 1:
        w_offset = (1, 1)
        wsize = (wsize[0] + w_offset[0], wsize[1] + w_offset[1])
    else:
        image = im_h

    itw = sliding_window_on_regions(image.shape, [tuple(roi)],
                                    wsize,
                                    step=wsize)
    wnd = []
    labels = []
    buff_size = 10000  # every <buff_size> patches we do a classification
    X = np.zeros((buff_size, codebook.cluster_centers_[0].shape[0]))
    k = 0
    if standardize:  # placed here, to avoid testing inside the loop
        for r in itw:
            # adjust if needed:
            r2 = (r[0], r[1] - w_offset[1], r[2], r[3] - w_offset[0])
            wnd.append(r2)
            X[k, :] = desc.compute(image[r[0]:r[1], r[2]:r[3]])
            k += 1
            if k == buff_size:
                X = (X - Xm) / Xs
                labels.extend(codebook.predict(X).tolist())
                k = 0  # reset the block
    else:
        for r in itw:
            # adjust if needed:
            r2 = (r[0], r[1] - w_offset[1], r[2], r[3] - w_offset[0])
            wnd.append(r2)
            X[k, :] = desc.compute(image[r[0]:r[1], r[2]:r[3]])
            k += 1
            if k == buff_size:
                labels.extend(codebook.predict(X).tolist())
                k = 0  # reset the block

    if k != 0:
        # it means some data is accumulated in X but not yet classified
        if standardize:
            X[0:k + 1, ] = (X[0:k + 1, ] - Xm) / Xs
        labels.extend(codebook.predict(X[0:k + 1, ]).tolist())

    with open(out_file, 'w') as f:
        n = len(wnd)  # total number of descriptors of this type
        for k in range(n):
            s = '\t'.join([str(x_)
                           for x_ in wnd[k]]) + '\t' + str(labels[k]) + '\n'
            f.write(s)
Example #8
0
def main():
    p = opt.ArgumentParser(description="""
            Segments a number of rectangular contexts from a H&E slide. The contexts are clusters
            of similar regions of the image. The similarity is based on various textural
            descriptors.
            """)
    p.add_argument('img_file', action='store', help='RGB image file')
    p.add_argument('ctxt', action='store', help='Number of contexts to extract', type=int)
    p.add_argument('wsize', action='store', help='Size of the (square) regions', type=int)
    p.add_argument('roi', action='store', help='a file with ROI coordinates (and context descriptors)')
    p.add_argument('label', action='store', help='the cluster label of interest')

    p.add_argument('--prefix', action='store',
                   help='optional prefix for the resulting files',
                   default=None)
    p.add_argument('--gabor', action='store_true',
                   help='compute Gabor descriptors and generate the corresponding contexts')
    p.add_argument('--lbp', action='store_true',
                   help='compute LBP (local binary patterns) descriptors and generate the corresponding contexts')
    p.add_argument('--mfs', action='store_true',
                   help='compute fractal descriptors and generate the corresponding contexts')
    p.add_argument('--eosine', action='store_true', help='should also Eosine component be processed?')

    p.add_argument('--scale', action='store', type=float, default=1.0,
                   help='scaling factor for ROI coordinates')


    args = p.parse_args()

    base_name = os.path.basename(args.img_file).split('.')
    if len(base_name) > 1:             # at least 1 suffix .ext
        base_name.pop()                # drop the extension
        base_name = '.'.join(base_name)  # reassemble the rest of the list into file name

    if args.prefix is not None:
        pfx = args.prefix
    else:
        pfx = base_name


    ROIs = []
    for l in file(args.roi).readlines():
        # extract the coordinates and the label from each ROI
        # (one per row):
        lb, row_min, row_max, col_min, col_max = map(lambda _x: int(float(_x)), l.split('\t')[1:5])
        row_min = int(mh.floor(row_min * args.scale))
        row_max = int(mh.floor(row_max * args.scale))
        col_min = int(mh.floor(col_min * args.scale))
        col_max = int(mh.floor(col_max * args.scale))
        if lb == args.label:
            ROIs.append([row_min, row_max, col_min, col_max])

    im = imread(args.img_file)
    print("Original image size:", im.shape)

    # get the H and E planes:
    h, e, _ = rgb2he2(im)

    if args.gabor:
        print("---------> Gabor descriptors:")
        g = GaborDescriptor()
        desc_label = 'gabor'

        print("------------> H plane")
        # on H-plane:
        img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize,args.wsize),
                                                 step=(args.wsize,args.wsize))
        dsc = get_local_desc(h, g, img_iterator, desc_label)

        dst = pdist_gabor(dsc)

        cl = average(dst)
        id = fcluster(cl, t=args.ctxt, criterion='maxclust')  # get the various contexts

        # save clustering/contexts - remember, the coordinates are in the
        # current image system which might have been cropped from the original ->
        # should add back the shift
        z1 = desc_to_matrix(dsc, desc_label)  # col 0: row_min, col 2: col_min
        z1[:, 0] += row_min + dh
        z1[:, 2] += col_min + dw
        z2 = np.matrix(id).transpose()
        z2 = np.hstack( (z2, z1) )
        np.savetxt(pfx+'_'+desc_label+'_h.dat', z2, delimiter="\t")

        # save visualizations
        for k in range(1,1+args.ctxt):
            i = np.where(id == k)[0]
            p = [dsc[j]['roi'] for j in i]
            im2 = enhance_patches(im, p)
            imsave(pfx+'_'+desc_label+'_h_'+str(k)+'.ppm', im2)

        if args.eosine:
            # repeat on E plane:
            print("------------> E plane")
            img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize,args.wsize),
                                                     step=(args.wsize,args.wsize))
            dsc = get_local_desc(e, g, img_iterator, desc_label)

            dst = pdist_gabor(dsc)

            cl = average(dst)
            id = fcluster(cl, t=args.ctxt, criterion='maxclust')  # get the various contexts

            # save clustering/contexts - remember, the coordinates are in the
            # current image system which might have been cropped from the original ->
            # should add back the shift
            z1 = desc_to_matrix(dsc, desc_label)  # col 0: row_min, col 2: col_min
            z1[:, 0] += row_min + dh
            z1[:, 2] += col_min + dw
            z2 = np.matrix(id).transpose()
            z2 = np.hstack( (z2, z1) )
            np.savetxt(pfx+'_'+desc_label+'_e.dat', z2, delimiter="\t")

            # save visualizations
            for k in range(1,1+args.ctxt):
                i = np.where(id == k)[0]
                p = [dsc[j]['roi'] for j in i]
                im2 = enhance_patches(im, p)
                imsave(pfx+'_'+desc_label+'_e_'+str(k)+'.ppm', im2)

        print("OK")

    if args.haralick:
        print("---------> Haralick descriptors:")
        g = GLCMDescriptor()
        desc_label = 'haralick'

        print("------------> H plane")
        # on H-plane:
        img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize,args.wsize),
                                                 step=(args.wsize,args.wsize))
        dsc = get_local_desc(h, g, img_iterator, desc_label)

        dst = pdist_gabor(dsc)

        cl = average(dst)
        id = fcluster(cl, t=args.ctxt, criterion='maxclust')  # get the various contexts

        # save clustering/contexts - remember, the coordinates are in the
        # current image system which might have been cropped from the original ->
        # should add back the shift
        z1 = desc_to_matrix(dsc, desc_label)  # col 0: row_min, col 2: col_min
        z1[:, 0] += row_min + dh
        z1[:, 2] += col_min + dw
        z2 = np.matrix(id).transpose()
        z2 = np.hstack( (z2, z1) )
        np.savetxt(pfx+'_'+desc_label+'_h.dat', z2, delimiter="\t")

        # save visualizations
        for k in range(1,1+args.ctxt):
            i = np.where(id == k)[0]
            p = [dsc[j]['roi'] for j in i]
            im2 = enhance_patches(im, p)
            imsave(pfx+'_'+desc_label+'_h_'+str(k)+'.ppm', im2)

        if args.eosine:
            # repeat on E plane:
            print("------------> E plane")
            img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize,args.wsize),
                                                     step=(args.wsize,args.wsize))
            dsc = get_local_desc(e, g, img_iterator, desc_label)

            dst = pdist_gabor(dsc)

            cl = average(dst)
            id = fcluster(cl, t=args.ctxt, criterion='maxclust')  # get the various contexts

            # save clustering/contexts - remember, the coordinates are in the
            # current image system which might have been cropped from the original ->
            # should add back the shift
            z1 = desc_to_matrix(dsc, desc_label)  # col 0: row_min, col 2: col_min
            z1[:, 0] += row_min + dh
            z1[:, 2] += col_min + dw
            z2 = np.matrix(id).transpose()
            z2 = np.hstack( (z2, z1) )
            np.savetxt(pfx+'_'+desc_label+'_e.dat', z2, delimiter="\t")

            # save visualizations
            for k in range(1,1+args.ctxt):
                i = np.where(id == k)[0]
                p = [dsc[j]['roi'] for j in i]
                im2 = enhance_patches(im, p)
                imsave(pfx+'_'+desc_label+'_e_'+str(k)+'.ppm', im2)

        print("OK")

    if args.lbp:
        print("---------> LBP descriptors:")
        g = LBPDescriptor()
        desc_label = 'lbp'

        # on H-plane:
        print("------------> H plane")
        img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize,args.wsize),
                                                 step=(args.wsize,args.wsize))
        dsc = get_local_desc(h, g, img_iterator, desc_label)

        dst = pdist_lbp(dsc)

        cl = average(dst)
        id = fcluster(cl, t=args.ctxt, criterion='maxclust')  # get the various contexts

        # save clustering/contexts - remember, the coordinates are in the
        # current image system which might have been cropped from the original ->
        # should add back the shift
        z1 = desc_to_matrix(dsc, desc_label)  # col 0: row_min, col 2: col_min
        z1[:, 0] += row_min + dh
        z1[:, 2] += col_min + dw
        z2 = np.matrix(id).transpose()
        z2 = np.hstack( (z2, z1) )
        np.savetxt(pfx+'_'+desc_label+'_h.dat', z2, delimiter="\t")

        # save visualizations
        for k in range(1,1+args.ctxt):
            i = np.where(id == k)[0]
            p = [dsc[j]['roi'] for j in i]
            im2 = enhance_patches(im, p)
            imsave(pfx+'_'+desc_label+'_h_'+str(k)+'.ppm', im2)

        if args.eosine:
            # repeat on E plane:
            print("------------> E plane")
            img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize,args.wsize),
                                                     step=(args.wsize,args.wsize))
            dsc = get_local_desc(e, g, img_iterator, desc_label)

            dst = pdist_lbp(dsc)

            cl = average(dst)
            id = fcluster(cl, t=args.ctxt, criterion='maxclust')  # get the various contexts

            # save clustering/contexts - remember, the coordinates are in the
            # current image system which might have been cropped from the original ->
            # should add back the shift
            z1 = desc_to_matrix(dsc, desc_label)  # col 0: row_min, col 2: col_min
            z1[:, 0] += row_min + dh
            z1[:, 2] += col_min + dw
            z2 = np.matrix(id).transpose()
            z2 = np.hstack( (z2, z1) )
            np.savetxt(pfx+'_'+desc_label+'_e.dat', z2, delimiter="\t")

            # save visualizations
            for k in range(1,1+args.ctxt):
                i = np.where(id == k)[0]
                p = [dsc[j]['roi'] for j in i]
                im2 = enhance_patches(im, p)
                imsave(pfx+'_'+desc_label+'_e_'+str(k)+'.ppm', im2)

        print("OK")

    if args.mfs:
        print("---------> MFS descriptors:")
        g = MFSDescriptor()
        desc_label = 'mfs'

        # on H-plane:
        print("------------> H plane")
        img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize,args.wsize),
                                                 step=(args.wsize,args.wsize))
        dsc = get_local_desc(h, g, img_iterator, desc_label)

        dst = pdist_mfs(dsc)

        cl = average(dst)
        id = fcluster(cl, t=args.ctxt, criterion='maxclust')  # get the various contexts

        # save clustering/contexts
        # save clustering/contexts - remember, the coordinates are in the
        # current image system which might have been cropped from the original ->
        # should add back the shift
        z1 = desc_to_matrix(dsc, desc_label)  # col 0: row_min, col 2: col_min
        z1[:, 0] += row_min + dh
        z1[:, 2] += col_min + dw
        z2 = np.matrix(id).transpose()
        z2 = np.hstack( (z2, z1) )
        np.savetxt(pfx+'_'+desc_label+'_h.dat', z2, delimiter="\t")

        # save visualizations
        for k in range(1,1+args.ctxt):
            i = np.where(id == k)[0]
            p = [dsc[j]['roi'] for j in i]
            im2 = enhance_patches(im, p)
            imsave(pfx+'_'+desc_label+'_h_'+str(k)+'.ppm', im2)

        if args.eosine:
            # repeat on E plane:
            print("------------> E plane")
            img_iterator = sliding_window_on_regions(h.shape, ROIs, (args.wsize,args.wsize),
                                                     step=(args.wsize,args.wsize))
            dsc = get_local_desc(e, g, img_iterator, desc_label)

            dst = pdist_mfs(dsc)

            cl = average(dst)
            id = fcluster(cl, t=args.ctxt, criterion='maxclust')  # get the various contexts

            # save clustering/contexts - remember, the coordinates are in the
            # current image system which might have been cropped from the original ->
            # should add back the shift
            z1 = desc_to_matrix(dsc, desc_label)  # col 0: row_min, col 2: col_min
            z1[:, 0] += row_min + dh
            z1[:, 2] += col_min + dw
            z2 = np.matrix(id).transpose()
            z2 = np.hstack( (z2, z1) )
            np.savetxt(pfx+'_'+desc_label+'_e.dat', z2, delimiter="\t")

            # save visualizations
            for k in range(1,1+args.ctxt):
                i = np.where(id == k)[0]
                p = [dsc[j]['roi'] for j in i]
                im2 = enhance_patches(im, p)
                imsave(pfx+'_'+desc_label+'_e_'+str(k)+'.ppm', im2)

        print("OK")

    return
Example #9
0
def main():
    p = opt.ArgumentParser(description="""
            Constructs a dictionary for image representation based on a set of specified local
            descriptors. The dictionary is built from a set of images given as a list in an
            input file.
            """)
    p.add_argument('config', action='store', help='a configuration file')
    args = p.parse_args()
    cfg_file = args.config
    
    parser = SafeConfigParser()
    parser.read(cfg_file)
    
    #---------
    # sampler:
    if not parser.has_section('sampler'):
        raise ValueError('"sampler" section is mandatory')
    if not parser.has_option('sampler', 'type'):
        raise ValueError('"sampler.type" is mandatory')
    tmp = parser.get('sampler', 'type').lower()
    if tmp not in ['random', 'sliding']:
        raise ValueError('Unkown sampling type')
    sampler_type = tmp
    if not parser.has_option('sampler', 'window_size'):
        raise ValueError('"sampler.window_size" is mandatory')
    wnd_size = ast.literal_eval(parser.get('sampler', 'window_size'))
    if type(wnd_size) != tuple:
        raise ValueError('"sampler.window_size" specification error')
    it_start = (0,0)
    it_step = (1,1)
    if sampler_type == 'sliding':
        if parser.has_option('sampler', 'start'):
            it_start = ast.literal_eval(parser.get('sampler','start'))
        if parser.has_option('sampler', 'step'):
            it_step  = ast.literal_eval(parser.get('sampler','step'))
    nwindows = parser.getint('sampler', 'nwindows')
                                    

    local_descriptors = []
    #---------
    # haar:
    if parser.has_section('haar'):
        tmp = True
        if parser.has_option('haar', 'norm'):
            tmp = parser.getboolean('haar', 'norm')
        if len(parser.items('haar')) == 0:
            # empty section, use defaults
            h = HaarLikeDescriptor(HaarLikeDescriptor.haars1())
        else:
            h = HaarLikeDescriptor([ast.literal_eval(v) for n, v in parser.items('haar')
                                    if n.lower() != 'norm'],
                _norm=tmp)
        local_descriptors.append(h)
        
        
    #---------
    # identity:
    if parser.has_section('identity'):
        local_descriptors.append(IdentityDescriptor())
        
    #---------
    # stats:
    if parser.has_section('stats'):
        tmp = []
        if parser.has_option('stats', 'mean') and parser.getboolean('stats', 'mean'):
            tmp.append('mean')
        if parser.has_option('stats', 'std') and parser.getboolean('stats', 'std'):
            tmp.append('std')
        if parser.has_option('stats', 'kurtosis') and parser.getboolean('stats', 'kurtosis'):
            tmp.append('kurtosis')
        if parser.has_option('stats', 'skewness') and parser.getboolean('stats', 'skewness'):
            tmp.append('skewness')
        if len(tmp) == 0:
            tmp = None
        local_descriptors.append(StatsDescriptor(tmp))
    
    #---------
    # hist:
    if parser.has_section('hist'):
        tmp = (0.0, 1.0)
        tmp2 = 10
        if parser.has_option('hist', 'min_max'):
            tmp = ast.literal_eval(parser.get('hist', 'min_max'))
            if type(tmp) != tuple:
                raise ValueError('"hist.min_max" specification error')
        if parser.has_option('hist', 'nbins'):
            tmp2 = parser.getint('hist', 'nbins')
        local_descriptors.append(HistDescriptor(_interval=tmp, _nbins=tmp2))
    
    
    #---------
    # HoG
    if parser.has_section('hog'):
        tmp = 9
        tmp2 = (128, 128)
        tmp3 = (4, 4)
        
        if parser.has_option('hog', 'norient'):
            tmp = parser.getint('hog', 'norient')
        if parser.has_option('hog', 'ppc'):
            tmp2 = ast.literal_eval(parser.get('hog', 'ppc'))
            if type(tmp2) != tuple:
                raise ValueError('"hog.ppc" specification error')
        if parser.has_option('hog', 'cpb'):
            tmp3 = ast.literal_eval(parser.get('hog', 'cpb'))
            if type(tmp3) != tuple:
                raise ValueError('"hog.cpb" specification error')
        local_descriptors.append(HOGDescriptor(_norient=tmp, _ppc=tmp2, _cpb=tmp3))
        
        
    #---------
    # LBP
    if parser.has_section('lbp'):
        tmp = 3
        tmp2 = 8*tmp
        tmp3 = 'uniform'
        
        if parser.has_option('lbp', 'radius'):
            tmp = parser.getint('lbp', 'radius')
        if parser.has_option('lbp', 'npoints'):
            tmp2 = parser.getint('lbp', 'npoints')
            if tmp2 == 0:
                tmp2 = 8* tmp
        if parser.has_option('lbp', 'method'):
            tmp3 = parser.get('lbp', 'method')
        local_descriptors.append(LBPDescriptor(radius=tmp, npoints=tmp2, method=tmp3))

    #---------
    # Gabor
    if parser.has_section('gabor'):
        tmp  = np.array([0.0, np.pi / 4.0, np.pi / 2.0, 3.0 * np.pi / 4.0], dtype=np.double)
        tmp2 = np.array([3.0 / 4.0, 3.0 / 8.0, 3.0 / 16.0], dtype=np.double)
        tmp3 = np.array([1.0, 2 * np.sqrt(2.0)], dtype=np.double)

        if parser.has_option('gabor', 'theta'):
            tmp = ast.literal_eval(parser.get('gabor', 'theta'))
        if parser.has_option('gabor', 'freq'):
            tmp2 = ast.literal_eval(parser.get('gabor', 'freq'))
        if parser.has_option('gabor', 'sigma'):
            tmp3 = ast.literal_eval(parser.get('gabor', 'sigma'))
        local_descriptors.append(GaborDescriptor(theta=tmp, freq=tmp2, sigma=tmp3))
            
    print('No. of descriptors: ', len(local_descriptors))
    
    #---------
    # data
    if not parser.has_section('data'):
        raise ValueError('Section "data" is mandatory.')
    data_path = parser.get('data', 'input_path')
    img_ext = parser.get('data', 'image_type')
    res_path = parser.get('data', 'output_path')
    
    img_files = glob.glob(data_path + '/*.' + img_ext)
    if len(img_files) == 0:
        return
    
    ## Process:

    sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)    # unbuferred output
    for img_name in img_files:
        print("Image: ", img_name, " ...reading... ", end='')
        im = imread(img_name)
        print("preprocessing... ", end='')
        # -preprocessing
        if im.ndim == 3:
            im_h, _, _ = rgb2he2(im)
        else:
            raise ValueError('Input image must be RGB.')
        
        # detect object region:
        # -try to load a precomputed mask:
        mask_file_name = data_path+'/mask/'+ \
            os.path.splitext(os.path.split(img_name)[1])[0]+ \
            '_tissue_mask.pbm'
        if os.path.exists(mask_file_name):
            print('(loading mask)...', end='')
            mask = imread(mask_file_name)
            mask = img_as_bool(mask)
            mask = remove_small_objects(mask, min_size=500, connectivity=1, in_place=True)
        else:
            print('(computing mask)...', end='')
            mask, _ = tissue_region_from_rgb(im, _min_area=500)
        
        row_min, col_min, row_max, col_max = bounding_box(mask)
        im_h[np.logical_not(mask)] = 0                       # make sure background is 0
        mask = None
        im = None
        im_h = im_h[row_min:row_max+1, col_min:col_max+1]

        print("growing the bag...", end='')
        # -image bag growing
        bag = None                               # bag for current image
        for d in local_descriptors:
            if bag is None:
                bag = grow_bag_from_new_image(im_h, d, wnd_size, nwindows, discard_empty=True)
            else:
                bag[d.name] = grow_bag_with_new_features(im_h, bag['regs'], d)[d.name]

        # save the results for each image, one file per descriptor
        desc_names = bag.keys()
        desc_names.remove('regs')                  # keep all keys but the regions
        # -save the ROI from the original image:
        res_file = res_path + '/' + 'roi-' + \
                   os.path.splitext(os.path.split(img_name)[1])[0] + '.dat'
        with open(res_file, 'w') as f:
            f.write('\t'.join([str(x_) for x_ in [row_min, row_max, col_min, col_max]]))
                    
        for dn in desc_names:
            res_file = res_path + '/' + dn + '_bag-' + \
                       os.path.splitext(os.path.split(img_name)[1])[0] + '.dat'
            with open(res_file, 'w') as f:
                n = len(bag[dn])                       # total number of descriptors of this type
                for i in range(n):
                    s = '\t'.join([str(x_) for x_ in bag['regs'][i]]) + '\t' + \
                        '\t'.join([str(x_) for x_ in bag[dn][i]]) + '\n'
                    f.write(s)
            
        print('OK')
        
        bag = None
        gc.collect()
        gc.collect()
Example #10
0
def main():
    p = opt.ArgumentParser(description="""
    Assigns the regions of an image to the clusters of a codebook.
    """)
    p.add_argument('image', action='store', help='image file name')
    p.add_argument('config', action='store', help='a configuration file')
    p.add_argument('-r', '--roi', action='store', nargs=4, type=int,
                   help='region of interest from the image as: row_min row_max col_min col_max',
                   default=None)
    args = p.parse_args()
    img_file = args.image
    cfg_file = args.config

    image_orig = skimage.io.imread(img_file)
    if image_orig.ndim == 3:
        im_h, _, _ = rgb2he2(image_orig)

    if args.roi is None:
        roi = (0, img.shape[0]-1, 0, img.shape[1]-1)
    else:
        roi = args.roi

    # Process configuration file:
    parser = SafeConfigParser()
    parser.read(cfg_file)

    if not parser.has_section('data'):
        raise RuntimeError('Section [data] is mandatory')
    wsize = (32, 32)
    if parser.has_option('data', 'window_size'):
        wsize = ast.literal_eval(parser.get('data', 'window_size'))

    if not parser.has_option('data', 'model'):
        raise RuntimeError('model file name is missing in [data] section')
    model_file = parser.get('data', 'model')
    with ModelPersistence(model_file, 'r', format='pickle') as mp:
        codebook = mp['codebook']
        Xm = mp['shift']
        Xs = mp['scale']
        standardize = mp['standardize']

    if parser.has_option('data', 'output'):
        out_file = parser.get('data', 'output')
    else:
        out_file = 'output.dat'

    descriptors = read_local_descriptors_cfg(parser)

    # For the moment, it is assumed tha only one type of local descriptors is
    # used - no composite feature vectors. This will change in the future but,
    # for the moment only the first type of descriptor in "descriptors" list
    # is used, and the codebook is assumed to be constructed using the same.

    desc = descriptors[0]

    print(img_file)
    print(wsize)
    print(roi[0], roi[1], roi[2], roi[3])


    w_offset = (0, 0)
    if isinstance(desc, HaarLikeDescriptor):
        # this one works on integral images
        image = intg_image(im_h)
        # the sliding window should also be increased by 1:
        w_offset = (1, 1)
        wsize = (wsize[0] + w_offset[0], wsize[1] + w_offset[1])
    else:
        image = im_h

    itw = sliding_window_on_regions(image.shape, [tuple(roi)], wsize, step=wsize)
    wnd = []
    labels = []
    buff_size = 10000                  # every <buff_size> patches we do a classification
    X = np.zeros((buff_size, codebook.cluster_centers_[0].shape[0]))
    k = 0
    if standardize:                    # placed here, to avoid testing inside the loop
        for r in itw:
            # adjust if needed:
            r2 = (r[0], r[1] - w_offset[1], r[2], r[3] - w_offset[0])
            wnd.append(r2)
            X[k,:] = desc.compute(image[r[0]:r[1], r[2]:r[3]])
            k += 1
            if k == buff_size:
                X = (X - Xm) / Xs
                labels.extend(codebook.predict(X).tolist())
                k = 0                      # reset the block
    else:
        for r in itw:
            # adjust if needed:
            r2 = (r[0], r[1] - w_offset[1], r[2], r[3] - w_offset[0])
            wnd.append(r2)
            X[k,:] = desc.compute(image[r[0]:r[1], r[2]:r[3]])
            k += 1
            if k == buff_size:
                labels.extend(codebook.predict(X).tolist())
                k = 0                      # reset the block

    if k != 0:
        # it means some data is accumulated in X but not yet classified
        if standardize:
            X[0:k+1,] = (X[0:k+1,] - Xm) / Xs
        labels.extend(codebook.predict(X[0:k+1,]).tolist())

    with open(out_file, 'w') as f:
        n = len(wnd)                       # total number of descriptors of this type
        for k in range(n):
            s = '\t'.join([str(x_) for x_ in wnd[k]]) + '\t' + str(labels[k]) + '\n'
            f.write(s)
Example #11
0
def main():
    p = opt.ArgumentParser(description="""
            Constructs a dictionary for image representation based on a set of specified local
            descriptors. The dictionary is built from a set of images given as a list in an
            input file.
            """)
    p.add_argument('config', action='store', help='a configuration file')
    args = p.parse_args()
    cfg_file = args.config

    parser = SafeConfigParser()
    parser.read(cfg_file)

    #---------
    # sampler:
    if not parser.has_section('sampler'):
        raise ValueError('"sampler" section is mandatory')
    if not parser.has_option('sampler', 'type'):
        raise ValueError('"sampler.type" is mandatory')
    tmp = parser.get('sampler', 'type').lower()
    if tmp not in ['random', 'sliding']:
        raise ValueError('Unkown sampling type')
    sampler_type = tmp
    if not parser.has_option('sampler', 'window_size'):
        raise ValueError('"sampler.window_size" is mandatory')
    wnd_size = ast.literal_eval(parser.get('sampler', 'window_size'))
    if type(wnd_size) != tuple:
        raise ValueError('"sampler.window_size" specification error')
    it_start = (0, 0)
    it_step = (1, 1)
    if sampler_type == 'sliding':
        if parser.has_option('sampler', 'start'):
            it_start = ast.literal_eval(parser.get('sampler', 'start'))
        if parser.has_option('sampler', 'step'):
            it_step = ast.literal_eval(parser.get('sampler', 'step'))
    nwindows = parser.getint('sampler', 'nwindows')

    local_descriptors = []
    #---------
    # haar:
    if parser.has_section('haar'):
        tmp = True
        if parser.has_option('haar', 'norm'):
            tmp = parser.getboolean('haar', 'norm')
        if len(parser.items('haar')) == 0:
            # empty section, use defaults
            h = HaarLikeDescriptor(HaarLikeDescriptor.haars1())
        else:
            h = HaarLikeDescriptor([
                ast.literal_eval(v)
                for n, v in parser.items('haar') if n.lower() != 'norm'
            ],
                                   _norm=tmp)
        local_descriptors.append(h)

    #---------
    # identity:
    if parser.has_section('identity'):
        local_descriptors.append(IdentityDescriptor())

    #---------
    # stats:
    if parser.has_section('stats'):
        tmp = []
        if parser.has_option('stats', 'mean') and parser.getboolean(
                'stats', 'mean'):
            tmp.append('mean')
        if parser.has_option('stats', 'std') and parser.getboolean(
                'stats', 'std'):
            tmp.append('std')
        if parser.has_option('stats', 'kurtosis') and parser.getboolean(
                'stats', 'kurtosis'):
            tmp.append('kurtosis')
        if parser.has_option('stats', 'skewness') and parser.getboolean(
                'stats', 'skewness'):
            tmp.append('skewness')
        if len(tmp) == 0:
            tmp = None
        local_descriptors.append(StatsDescriptor(tmp))

    #---------
    # hist:
    if parser.has_section('hist'):
        tmp = (0.0, 1.0)
        tmp2 = 10
        if parser.has_option('hist', 'min_max'):
            tmp = ast.literal_eval(parser.get('hist', 'min_max'))
            if type(tmp) != tuple:
                raise ValueError('"hist.min_max" specification error')
        if parser.has_option('hist', 'nbins'):
            tmp2 = parser.getint('hist', 'nbins')
        local_descriptors.append(HistDescriptor(_interval=tmp, _nbins=tmp2))

    #---------
    # HoG
    if parser.has_section('hog'):
        tmp = 9
        tmp2 = (128, 128)
        tmp3 = (4, 4)

        if parser.has_option('hog', 'norient'):
            tmp = parser.getint('hog', 'norient')
        if parser.has_option('hog', 'ppc'):
            tmp2 = ast.literal_eval(parser.get('hog', 'ppc'))
            if type(tmp2) != tuple:
                raise ValueError('"hog.ppc" specification error')
        if parser.has_option('hog', 'cpb'):
            tmp3 = ast.literal_eval(parser.get('hog', 'cpb'))
            if type(tmp3) != tuple:
                raise ValueError('"hog.cpb" specification error')
        local_descriptors.append(
            HOGDescriptor(_norient=tmp, _ppc=tmp2, _cpb=tmp3))

    #---------
    # LBP
    if parser.has_section('lbp'):
        tmp = 3
        tmp2 = 8 * tmp
        tmp3 = 'uniform'

        if parser.has_option('lbp', 'radius'):
            tmp = parser.getint('lbp', 'radius')
        if parser.has_option('lbp', 'npoints'):
            tmp2 = parser.getint('lbp', 'npoints')
            if tmp2 == 0:
                tmp2 = 8 * tmp
        if parser.has_option('lbp', 'method'):
            tmp3 = parser.get('lbp', 'method')
        local_descriptors.append(
            LBPDescriptor(radius=tmp, npoints=tmp2, method=tmp3))

    #---------
    # Gabor
    if parser.has_section('gabor'):
        tmp = np.array([0.0, np.pi / 4.0, np.pi / 2.0, 3.0 * np.pi / 4.0],
                       dtype=np.double)
        tmp2 = np.array([3.0 / 4.0, 3.0 / 8.0, 3.0 / 16.0], dtype=np.double)
        tmp3 = np.array([1.0, 2 * np.sqrt(2.0)], dtype=np.double)

        if parser.has_option('gabor', 'theta'):
            tmp = ast.literal_eval(parser.get('gabor', 'theta'))
        if parser.has_option('gabor', 'freq'):
            tmp2 = ast.literal_eval(parser.get('gabor', 'freq'))
        if parser.has_option('gabor', 'sigma'):
            tmp3 = ast.literal_eval(parser.get('gabor', 'sigma'))
        local_descriptors.append(
            GaborDescriptor(theta=tmp, freq=tmp2, sigma=tmp3))

    print('No. of descriptors: ', len(local_descriptors))

    #---------
    # data
    if not parser.has_section('data'):
        raise ValueError('Section "data" is mandatory.')
    data_path = parser.get('data', 'input_path')
    img_ext = parser.get('data', 'image_type')
    res_path = parser.get('data', 'output_path')

    img_files = glob.glob(data_path + '/*.' + img_ext)
    if len(img_files) == 0:
        return

    ## Process:

    sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)  # unbuferred output
    for img_name in img_files:
        print("Image: ", img_name, " ...reading... ", end='')
        im = imread(img_name)
        print("preprocessing... ", end='')
        # -preprocessing
        if im.ndim == 3:
            im_h, _, _ = rgb2he2(im)
        else:
            raise ValueError('Input image must be RGB.')

        # detect object region:
        # -try to load a precomputed mask:
        mask_file_name = data_path+'/mask/'+ \
            os.path.splitext(os.path.split(img_name)[1])[0]+ \
            '_tissue_mask.pbm'
        if os.path.exists(mask_file_name):
            print('(loading mask)...', end='')
            mask = imread(mask_file_name)
            mask = img_as_bool(mask)
            mask = remove_small_objects(mask,
                                        min_size=500,
                                        connectivity=1,
                                        in_place=True)
        else:
            print('(computing mask)...', end='')
            mask, _ = tissue_region_from_rgb(im, _min_area=500)

        row_min, col_min, row_max, col_max = bounding_box(mask)
        im_h[np.logical_not(mask)] = 0  # make sure background is 0
        mask = None
        im = None
        im_h = im_h[row_min:row_max + 1, col_min:col_max + 1]

        print("growing the bag...", end='')
        # -image bag growing
        bag = None  # bag for current image
        for d in local_descriptors:
            if bag is None:
                bag = grow_bag_from_new_image(im_h,
                                              d,
                                              wnd_size,
                                              nwindows,
                                              discard_empty=True)
            else:
                bag[d.name] = grow_bag_with_new_features(im_h, bag['regs'],
                                                         d)[d.name]

        # save the results for each image, one file per descriptor
        desc_names = bag.keys()
        desc_names.remove('regs')  # keep all keys but the regions
        # -save the ROI from the original image:
        res_file = res_path + '/' + 'roi-' + \
                   os.path.splitext(os.path.split(img_name)[1])[0] + '.dat'
        with open(res_file, 'w') as f:
            f.write('\t'.join(
                [str(x_) for x_ in [row_min, row_max, col_min, col_max]]))

        for dn in desc_names:
            res_file = res_path + '/' + dn + '_bag-' + \
                       os.path.splitext(os.path.split(img_name)[1])[0] + '.dat'
            with open(res_file, 'w') as f:
                n = len(bag[dn])  # total number of descriptors of this type
                for i in range(n):
                    s = '\t'.join([str(x_) for x_ in bag['regs'][i]]) + '\t' + \
                        '\t'.join([str(x_) for x_ in bag[dn][i]]) + '\n'
                    f.write(s)

        print('OK')

        bag = None
        gc.collect()
        gc.collect()
Example #12
0
def main():
    p = opt.ArgumentParser(description="""
            Segments a number of rectangular contexts from a H&E slide. The contexts are clusters
            of similar regions of the image. The similarity is based on various textural
            descriptors.
            """)
    p.add_argument('meta_file', action='store', help='XML file describing the structure of the imported file')
    p.add_argument('scale', action='store', help='which of the scales to be processed')
    p.add_argument('ctxt', action='store', help='number of contexts to extract', type=int)
    p.add_argument('wsize', action='store', help='size of the (square) regions', type=int)
    p.add_argument('--prefix', action='store',
                   help='optional prefix for the resulting files',
                   default=None)
    p.add_argument('--gabor', action='store_true',
                   help='compute Gabor descriptors and generate the corresponding contexts')
    p.add_argument('--lbp', action='store_true',
                   help='compute LBP (local binary patterns) descriptors and generate the corresponding contexts')
    p.add_argument('--mfs', action='store_true',
                   help='compute fractal descriptors and generate the corresponding contexts')
    p.add_argument('--haralick', action='store_true',
                   help='compute Haralick descriptors and generate the corresponding contexts')
    p.add_argument('--row_min', action='store', type=int, help='start row (rows start at 0)', default=0)
    p.add_argument('--col_min', action='store', type=int, help='start column (columns start at 0)', default=0)
    p.add_argument('--row_max', action='store', type=int, help='end row (maximum: image height-1)', default=0)
    p.add_argument('--col_max', action='store', type=int, help='end column (maximum: image width-1)', default=0)
    p.add_argument('--eosine', action='store_true', help='should also Eosine component be processed?')


    args = p.parse_args()

    xml_file = ET.parse(args.meta_file)
    xml_root = xml_file.getroot()

    # find the name of the image:
    base_name = os.path.basename(xml_root.find('file').text).split('.')
    if len(base_name) > 1:             # at least 1 suffix .ext
        base_name.pop()                # drop the extension
        base_name = '.'.join(base_name)  # reassemble the rest of the list into file name

    if args.prefix is not None:
        pfx = args.prefix
    else:
        pfx = base_name

    path = os.path.dirname(args.meta_file)

    # Check if the required scale exists:
    vrs = [_x for _x in xml_root.findall('version') if _x.find('scale').text == args.scale]
    if len(vrs) == 0:
        raise ValueError('The requested scale does not exits.')
    if len(vrs) > 1:
        raise ValueError('Inconsistency detected for the requested scale.')
    all_tiles = vrs[0].findall('tile')

    # get the info about full image:
    im_width = int(xml_root.find('original/width').text)
    im_height = int(xml_root.find('original/height').text)

    row_min = min(max(args.row_min, 0), im_height-2)
    col_min = min(max(args.col_min, 0), im_width-2)
    row_max = max(min(args.row_max, im_height-1), 0)
    col_max = max(min(args.col_max, im_width-1), 0)

    if row_max == 0:
        row_max = im_height - 1
    if col_max == 0:
        col_max = im_width - 1

    if row_max - row_min < args.wsize or col_max - col_min < args.wsize:
        raise ValueError('Window size too large for requested image size.')

    # keep only the tiles that overlap with the specified region
    tiles = [tl.attrib for tl in all_tiles if int(tl.attrib['x1']) >= col_min
             and col_max >= int(tl.attrib['x0'])
             and int(tl.attrib['y1']) >= row_min
             and row_max >= int(tl.attrib['y0'])]

    ## print("ROI covers", len(tiles), "tiles")

    # Sort the tiles from top to bottom and left to right.
    # -get all the (i,j) indices of the tiles:
    rx = re.compile(r'[_.]')
    ij = np.array([map(int, rx.split(t['name'])[1:3]) for t in tiles])
    # -find i_min, i_max, j_min and j_max. Since the tiles are consecutive
    # (on row and column), these are enough to generate the desired order:
    tile_i_min, tile_j_min = ij.min(axis=0)
    tile_i_max, tile_j_max = ij.max(axis=0)

    row_offset = 0
    for i in range(tile_i_min, tile_i_max+1):
        col_offset = 0
        for j in range(tile_j_min, tile_j_max+1):
            # double-check that tile_i_j is in the list of tiles:
            idx = map(lambda _x,_y: _x['name'] == _y, tiles,
                      len(tiles)*['tile_'+str(i)+'_'+str(j)+'.ppm'])
            if not any(idx):
                raise RuntimeError("Missing tile" + 'tile_'+str(i)+'_'+str(j)+'.ppm')
            tile = tiles[idx.index(True)]
            ## print("Current tile:", tile['name'])

            # Idea: the current tile (i,j) might need to be extended with a stripe
            # of maximum args.wsize to the left and bottom. So we load (if they
            # are available) the tiles (i,j+1), (i+1,j) and (i+1,j+1) and extend
            # the current tile...

            # a tile from the image is in <path>/<scale>/tile_i_j.ppm
            im = imread(path + '/' + str(args.scale) + '/' + tile['name'])
            tile_height, tile_width, _ = im.shape

            ## print("Tile size:", tile_height, "x", tile_width)

            # The scanning (sliding) windows will start at (row_offset, col_offset)
            # (in this tile's coordinate system). We want to have an integer number
            # of windows so, if needed (and possible) we will extend the current
            # tile with a block of pixels from the neighboring tiles.

            # number of windows on the horizontal
            need_expand_right = False
            right_pad = 0
            right_tile = None
            if j < tile_j_max:  # then we could eventually expand
                if (tile_width - col_offset) % args.wsize != 0:
                    need_expand_right = True
                    nh = int(mh.ceil((tile_width - col_offset) / args.wsize))
                    right_pad = nh*args.wsize - (tile_width - col_offset)
                    tile_name = 'tile_'+str(i)+'_'+str(j+1)+'.ppm'
                    idx = map(lambda _x,_y: _x['name'] == _y, tiles, len(tiles)*[tile_name])
                    assert(any(idx))
                    right_tile = tiles[idx.index(True)]

            # number of windows on the vertical
            need_expand_bot = False
            bot_pad = 0
            bot_tile = None
            if i < tile_i_max:
                if (tile_height - row_offset) % args.wsize != 0:
                    need_expand_bot = True
                    nv = int(mh.ceil((tile_height - row_offset) / args.wsize))
                    bot_pad = nv*args.wsize - (tile_height - row_offset)
                    tile_name = 'tile_'+str(i+1)+'_'+str(j)+'.ppm'
                    idx = map(lambda _x,_y: _x['name'] == _y, tiles, len(tiles)*[tile_name])
                    assert(any(idx))
                    bot_tile = tiles[idx.index(True)]

            ## print("Expand: right=", need_expand_right, "bottom=", need_expand_bot)
            ## print("...by: right=", right_pad, "bottom=", bot_pad, "pixels")

            rb_tile = None
            if need_expand_right and need_expand_bot:
                # this MUST exist if the right and bottom tiles above exist:
                tile_name = 'tile_'+str(i+1)+'_'+str(j+1)+'.ppm'
                idx = map(lambda _x,_y: _x['name'] == _y, tiles, len(tiles)*[tile_name])
                assert(any(idx))
                rb_tile = tiles[idx.index(True)]

            ## if right_tile is not None:
            ##     print("Expansion tile right:", right_tile['name'])
            ## if bot_tile is not None:
            ##     print("Expansion tile bottom:", bot_tile['name'])
            ## if rb_tile is not None:
            ##     print("Expansion tile bottom-right:", rb_tile['name'])

            # expand the image to the right and bottom only if there is a neighboring tile in
            # that direction
            r = 1 if right_tile is not None else 0
            b = 1 if bot_tile is not None else 0

            next_row_offset, next_col_offset = 0, 0

            if r+b > 0:  # we need to (and we can) pad the image with pixels from neighbors
                # Enlarge the image to the right and bottom:

                # The following line gives an error. (TypeError: 'unicode' object is not callable) Why?
                # im = np.pad(im, ((0, bot_pad), (0, right_pad), (0, 0)), mode='constant')
                im_tmp = np.zeros((tile_height+b*bot_pad, tile_width+r*right_pad, im.shape[2]))
                im_tmp[0:tile_height, 0:tile_width, :] = im
                im = im_tmp

                if right_tile is not None:
                    # a tile from the image is in <path>/<scale>/tile_i_j.ppm
                    im_tmp = imread(path + '/' + str(args.scale) + '/' + right_tile['name'])
                    im[0:tile_height, tile_width:tile_width+right_pad, :] = im_tmp[0:tile_height, 0:right_pad, :]
                    next_col_offset = right_pad

                if bot_tile is not None:
                    # a tile from the image is in <path>/<scale>/tile_i_j.ppm
                    im_tmp = imread(path + '/' + str(args.scale) + '/' + bot_tile['name'])
                    im[tile_height:tile_height+bot_pad, 0:tile_width, :] = im_tmp[0:bot_pad, 0:tile_width, :]
                    next_row_offset = bot_pad

                if rb_tile is not None:
                    # a tile from the image is in <path>/<scale>/tile_i_j.ppm
                    im_tmp = imread(path + '/' + str(args.scale) + '/' + rb_tile['name'])
                    im[tile_height:tile_height+bot_pad, tile_width:tile_width+right_pad, :] = im_tmp[0:bot_pad, 0:right_pad, :]

                im_tmp = None  # discard

            # From the current tile (padded), we need to process the region
            # (row_offset, col_offset) -> (im.height, im.width) (with new
            # height and width). But there might still be some restrictions
            # due to the region of interest (row_min, col_min) -> (row_max, col_max).
            # These last coordinates are in global coordinate system! So, first we
            # convert them to (rmn, cmn) -> (rmx, cmx), and lower bound them to
            # the offset:
            rmn = max(row_min - int(tile['y0']), row_offset)
            rmx = min(row_max - int(tile['y0']) + 1, im.shape[0])
            cmn = max(col_min - int(tile['x0']), col_offset)
            cmx = min(col_max - int(tile['x0']) + 1, im.shape[1])

            ## print("Final region of the image:", rmn, rmx, cmn, cmx)

            im = im[rmn:rmx, cmn:cmx, :]  # image to process

            # tile contains the real coordinates of the region in the image
            crt_row_min = int(tile['y0'])
            crt_col_min = int(tile['x0'])

            col_offset = next_col_offset

            ## print("Next offsets:", row_offset, col_offset)
            ## print("=======================================================")
            ## print("=======================================================")

            # Finally, we have the image for analysis. Don't forget to transform the coordinates
            # from current tile system to global image system when saving the results.
            if im.shape[0] < args.wsize or im.shape[1] < args.wsize:
                # (what is left of the) tile is smaller than the window size
                continue
            # get the H and E planes:
            h, e, _ = rgb2he2(im)

            if args.gabor:
                print("---------> Gabor descriptors:")
                g = GaborDescriptor()
                desc_label = 'gabor'

                print("------------> H plane")
                # on H-plane:
                img_iterator = sliding_window(h.shape, (args.wsize,args.wsize),
                                              step=(args.wsize,args.wsize))
                dsc = get_local_desc(h, g, img_iterator, desc_label)
                id = np.zeros((1, len(dsc)))   # we do not cluster here...

                # save clustering/contexts - remember, the coordinates are in the
                # current tile/image system -> should add back the shift
                z1 = desc_to_matrix(dsc, desc_label)  # col 0: row_min, col 2: col_min
                z1[:, 0:2] += crt_row_min + rmn
                z1[:, 2:4] += crt_col_min + cmn
                z2 = np.matrix(id).transpose()
                z2 = np.hstack( (z2, z1) )
                np.savetxt(pfx+'_'+tile['name']+'_'+desc_label+'_h.dat', z2, delimiter="\t")

                if args.eosine:
                    # repeat on E plane:
                    print("------------> E plane")
                    img_iterator = sliding_window(h.shape, (args.wsize,args.wsize),
                                                  step=(args.wsize,args.wsize))
                    dsc = get_local_desc(e, g, img_iterator, desc_label)
                    id = np.zeros((1, len(dsc)))   # we do not cluster here...

                    # save clustering/contexts - remember, the coordinates are in the
                    # current tile/image system -> should add back the shift
                    z1 = desc_to_matrix(dsc, desc_label)  # col 0: row_min, col 2: col_min
                    z1[:, 0:2] += crt_row_min + rmn
                    z1[:, 2:4] += crt_col_min + cmn
                    z2 = np.matrix(id).transpose()
                    z2 = np.hstack( (z2, z1) )
                    np.savetxt(pfx+'_'+tile['name']+'_'+desc_label+'_e.dat', z2, delimiter="\t")
                print("OK")

        # end for j...
        row_offset = next_row_offset
    # end for i....

    return
Example #13
0
def main():
    p = opt.ArgumentParser(description="""
            Segments a number of rectangular contexts from a H&E slide. The contexts are clusters
            of similar regions of the image. The similarity is based on various textural
            descriptors.
            """)
    p.add_argument('img_file', action='store', help='RGB image file')
    p.add_argument('ctxt',
                   action='store',
                   help='Number of contexts to extract',
                   type=int)
    p.add_argument('wsize',
                   action='store',
                   help='Size of the (square) regions',
                   type=int)
    p.add_argument('--prefix',
                   action='store',
                   help='optional prefix for the resulting files',
                   default=None)
    p.add_argument(
        '--gabor',
        action='store_true',
        help='compute Gabor descriptors and generate the corresponding contexts'
    )
    p.add_argument(
        '--lbp',
        action='store_true',
        help=
        'compute LBP (local binary patterns) descriptors and generate the corresponding contexts'
    )
    p.add_argument(
        '--mfs',
        action='store_true',
        help=
        'compute fractal descriptors and generate the corresponding contexts')
    p.add_argument(
        '--haralick',
        action='store_true',
        help=
        'compute Haralick descriptors and generate the corresponding contexts')
    p.add_argument('--row_min',
                   action='store',
                   type=int,
                   help='start row (rows start at 0)',
                   default=0)
    p.add_argument('--col_min',
                   action='store',
                   type=int,
                   help='start column (columns start at 0)',
                   default=0)
    p.add_argument('--row_max',
                   action='store',
                   type=int,
                   help='end row (maximum: image height-1)',
                   default=0)
    p.add_argument('--col_max',
                   action='store',
                   type=int,
                   help='end column (maximum: image width-1)',
                   default=0)
    p.add_argument('--eosine',
                   action='store_true',
                   help='should also Eosine component be processed?')

    args = p.parse_args()

    base_name = os.path.basename(args.img_file).split('.')
    if len(base_name) > 1:  # at least 1 suffix .ext
        base_name.pop()  # drop the extension
        base_name = '.'.join(
            base_name)  # reassemble the rest of the list into file name

    if args.prefix is not None:
        pfx = args.prefix
    else:
        pfx = base_name

    im = imread(args.img_file)
    print("Original image size:", im.shape)

    row_min = min(max(args.row_min, 0), im.shape[0] - 2)
    col_min = min(max(args.col_min, 0), im.shape[1] - 2)
    row_max = max(min(args.row_max, im.shape[0] - 1), 0)
    col_max = max(min(args.col_max, im.shape[1] - 1), 0)

    if row_max == 0:
        row_max = im.shape[0] - 1
    if col_max == 0:
        col_max = im.shape[1] - 1

    if row_max - row_min < args.wsize or col_max - col_min < args.wsize:
        raise ValueError('Window size too large for requested image size.')

    im = im[row_min:row_max + 1, col_min:col_max + 1, :]

    # crop the image to multiple of wsize:
    nh, nw = mh.floor(im.shape[0] / args.wsize), mh.floor(im.shape[1] /
                                                          args.wsize)
    dh, dw = mh.floor((im.shape[0] - nh * args.wsize) / 2), mh.floor(
        (im.shape[1] - nw * args.wsize) / 2)
    im = im[dh:dh + nh * args.wsize, dw:dw + nw * args.wsize, :]
    print("Image cropped to:", im.shape)
    imsave(pfx + '_cropped.ppm', im)

    # get the H and E planes:
    h, e, _ = rgb2he2(im)

    if args.gabor:
        print("---------> Gabor descriptors:")
        g = GaborDescriptor()
        desc_label = 'gabor'

        print("------------> H plane")
        # on H-plane:
        img_iterator = sliding_window(h.shape, (args.wsize, args.wsize),
                                      step=(args.wsize, args.wsize))
        dsc = get_local_desc(h, g, img_iterator, desc_label)

        dst = pdist_gabor(dsc)

        cl = average(dst)
        id = fcluster(cl, t=args.ctxt,
                      criterion='maxclust')  # get the various contexts

        # save clustering/contexts - remember, the coordinates are in the
        # current image system which might have been cropped from the original ->
        # should add back the shift
        z1 = desc_to_matrix(dsc, desc_label)  # col 0: row_min, col 2: col_min
        z1[:, 0] += row_min + dh
        z1[:, 2] += col_min + dw
        z2 = np.matrix(id).transpose()
        z2 = np.hstack((z2, z1))
        np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t")

        # save visualizations
        for k in range(1, 1 + args.ctxt):
            i = np.where(id == k)[0]
            p = [dsc[j]['roi'] for j in i]
            im2 = enhance_patches(im, p)
            imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2)

        if args.eosine:
            # repeat on E plane:
            print("------------> E plane")
            img_iterator = sliding_window(h.shape, (args.wsize, args.wsize),
                                          step=(args.wsize, args.wsize))
            dsc = get_local_desc(e, g, img_iterator, desc_label)

            dst = pdist_gabor(dsc)

            cl = average(dst)
            id = fcluster(cl, t=args.ctxt,
                          criterion='maxclust')  # get the various contexts

            # save clustering/contexts - remember, the coordinates are in the
            # current image system which might have been cropped from the original ->
            # should add back the shift
            z1 = desc_to_matrix(
                dsc,
                desc_label)  # col 0:4 [row_min, row_max, col_min, col_max]
            z1[:, 0:2] += row_min + dh
            z1[:, 2:4] += col_min + dw
            z2 = np.matrix(id).transpose()
            z2 = np.hstack((z2, z1))
            np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t")

            # save visualizations
            for k in range(1, 1 + args.ctxt):
                i = np.where(id == k)[0]
                p = [dsc[j]['roi'] for j in i]
                im2 = enhance_patches(im, p)
                imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2)

        print("OK")

    if args.haralick:
        print("---------> Haralick descriptors:")
        g = GLCMDescriptor()
        desc_label = 'haralick'

        print("------------> H plane")
        # on H-plane:
        img_iterator = sliding_window(h.shape, (args.wsize, args.wsize),
                                      step=(args.wsize, args.wsize))
        dsc = get_local_desc(h, g, img_iterator, desc_label)

        dst = pdist_gabor(dsc)

        cl = average(dst)
        id = fcluster(cl, t=args.ctxt,
                      criterion='maxclust')  # get the various contexts

        # save clustering/contexts - remember, the coordinates are in the
        # current image system which might have been cropped from the original ->
        # should add back the shift
        z1 = desc_to_matrix(
            dsc, desc_label)  # col 0:4 [row_min, row_max, col_min, col_max]
        z1[:, 0:2] += row_min + dh
        z1[:, 2:4] += col_min + dw
        z2 = np.matrix(id).transpose()
        z2 = np.hstack((z2, z1))
        np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t")

        # save visualizations
        for k in range(1, 1 + args.ctxt):
            i = np.where(id == k)[0]
            p = [dsc[j]['roi'] for j in i]
            im2 = enhance_patches(im, p)
            imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2)

        if args.eosine:
            # repeat on E plane:
            print("------------> E plane")
            img_iterator = sliding_window(h.shape, (args.wsize, args.wsize),
                                          step=(args.wsize, args.wsize))
            dsc = get_local_desc(e, g, img_iterator, desc_label)

            dst = pdist_gabor(dsc)

            cl = average(dst)
            id = fcluster(cl, t=args.ctxt,
                          criterion='maxclust')  # get the various contexts

            # save clustering/contexts - remember, the coordinates are in the
            # current image system which might have been cropped from the original ->
            # should add back the shift
            z1 = desc_to_matrix(
                dsc,
                desc_label)  # col 0:4 [row_min, row_max, col_min, col_max]
            z1[:, 0:2] += row_min + dh
            z1[:, 2:4] += col_min + dw
            z2 = np.matrix(id).transpose()
            z2 = np.hstack((z2, z1))
            np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t")

            # save visualizations
            for k in range(1, 1 + args.ctxt):
                i = np.where(id == k)[0]
                p = [dsc[j]['roi'] for j in i]
                im2 = enhance_patches(im, p)
                imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2)

        print("OK")

    if args.lbp:
        print("---------> LBP descriptors:")
        g = LBPDescriptor()
        desc_label = 'lbp'

        # on H-plane:
        print("------------> H plane")
        img_iterator = sliding_window(h.shape, (args.wsize, args.wsize),
                                      step=(args.wsize, args.wsize))
        dsc = get_local_desc(h, g, img_iterator, desc_label)

        dst = pdist_lbp(dsc)

        cl = average(dst)
        id = fcluster(cl, t=args.ctxt,
                      criterion='maxclust')  # get the various contexts

        # save clustering/contexts - remember, the coordinates are in the
        # current image system which might have been cropped from the original ->
        # should add back the shift
        z1 = desc_to_matrix(
            dsc, desc_label)  # col 0:4 [row_min, row_max, col_min, col_max]
        z1[:, 0:2] += row_min + dh
        z1[:, 2:4] += col_min + dw
        z2 = np.matrix(id).transpose()
        z2 = np.hstack((z2, z1))
        np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t")

        # save visualizations
        for k in range(1, 1 + args.ctxt):
            i = np.where(id == k)[0]
            p = [dsc[j]['roi'] for j in i]
            im2 = enhance_patches(im, p)
            imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2)

        if args.eosine:
            # repeat on E plane:
            print("------------> E plane")
            img_iterator = sliding_window(h.shape, (args.wsize, args.wsize),
                                          step=(args.wsize, args.wsize))
            dsc = get_local_desc(e, g, img_iterator, desc_label)

            dst = pdist_lbp(dsc)

            cl = average(dst)
            id = fcluster(cl, t=args.ctxt,
                          criterion='maxclust')  # get the various contexts

            # save clustering/contexts - remember, the coordinates are in the
            # current image system which might have been cropped from the original ->
            # should add back the shift
            z1 = desc_to_matrix(
                dsc,
                desc_label)  # col 0:4 [row_min, row_max, col_min, col_max]
            z1[:, 0:2] += row_min + dh
            z1[:, 2:4] += col_min + dw
            z2 = np.matrix(id).transpose()
            z2 = np.hstack((z2, z1))
            np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t")

            # save visualizations
            for k in range(1, 1 + args.ctxt):
                i = np.where(id == k)[0]
                p = [dsc[j]['roi'] for j in i]
                im2 = enhance_patches(im, p)
                imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2)

        print("OK")

    if args.mfs:
        print("---------> MFS descriptors:")
        g = MFSDescriptor()
        desc_label = 'mfs'

        # on H-plane:
        print("------------> H plane")
        img_iterator = sliding_window(h.shape, (args.wsize, args.wsize),
                                      step=(args.wsize, args.wsize))
        dsc = get_local_desc(h, g, img_iterator, desc_label)

        dst = pdist_mfs(dsc)

        cl = average(dst)
        id = fcluster(cl, t=args.ctxt,
                      criterion='maxclust')  # get the various contexts

        # save clustering/contexts
        # save clustering/contexts - remember, the coordinates are in the
        # current image system which might have been cropped from the original ->
        # should add back the shift
        z1 = desc_to_matrix(
            dsc, desc_label)  # col 0:4 [row_min, row_max, col_min, col_max]
        z1[:, 0:2] += row_min + dh
        z1[:, 2:4] += col_min + dw
        z2 = np.matrix(id).transpose()
        z2 = np.hstack((z2, z1))
        np.savetxt(pfx + '_' + desc_label + '_h.dat', z2, delimiter="\t")

        # save visualizations
        for k in range(1, 1 + args.ctxt):
            i = np.where(id == k)[0]
            p = [dsc[j]['roi'] for j in i]
            im2 = enhance_patches(im, p)
            imsave(pfx + '_' + desc_label + '_h_' + str(k) + '.ppm', im2)

        if args.eosine:
            # repeat on E plane:
            print("------------> E plane")
            img_iterator = sliding_window(h.shape, (args.wsize, args.wsize),
                                          step=(args.wsize, args.wsize))
            dsc = get_local_desc(e, g, img_iterator, desc_label)

            dst = pdist_mfs(dsc)

            cl = average(dst)
            id = fcluster(cl, t=args.ctxt,
                          criterion='maxclust')  # get the various contexts

            # save clustering/contexts - remember, the coordinates are in the
            # current image system which might have been cropped from the original ->
            # should add back the shift
            z1 = desc_to_matrix(
                dsc,
                desc_label)  # col 0:4 [row_min, row_max, col_min, col_max]
            z1[:, 0:2] += row_min + dh
            z1[:, 2:4] += col_min + dw
            z2 = np.matrix(id).transpose()
            z2 = np.hstack((z2, z1))
            np.savetxt(pfx + '_' + desc_label + '_e.dat', z2, delimiter="\t")

            # save visualizations
            for k in range(1, 1 + args.ctxt):
                i = np.where(id == k)[0]
                p = [dsc[j]['roi'] for j in i]
                im2 = enhance_patches(im, p)
                imsave(pfx + '_' + desc_label + '_e_' + str(k) + '.ppm', im2)

        print("OK")

    return