Exemple #1
0
    def __getitem__(self, item):
        name = self.images[item]
        path = os.path.join(self.data_dir, name)
        label = self.labels[item]

        label = np.array([(1 if i < label else 0) for i in range(5)],
                         dtype=np.float32)

        img = MultiImage(path)[self.tiff_scale]
        img = cv.cvtColor(img, cv.COLOR_RGB2BGR)

        img = tile(img, self.sz, self.N, self.transforms, self.random)

        img = torch.from_numpy(img.transpose(2, 0, 1))
        return img, label
Exemple #2
0
    def __getitem__(self, item):
        name = self.images[item]
        if not os.path.splitext(name)[1]:
            name += ".tiff"
        path = os.path.join(self.data_dir, name)

        img = MultiImage(path)[self.tiff_scale]
        img = cv.cvtColor(img, cv.COLOR_RGB2BGR)
        img, _ = get_minimal_image(img)
        img = tile(img, self.tile_size, self.num_tiles)
        if self.transforms is not None:
            img = self.transforms(image=img)["image"]

        img = torch.from_numpy(img.transpose(2, 0, 1))
        return img
Exemple #3
0
def tif_txt_extract(input_files_path,
                    output_files_path,
                    ocr_path=None,
                    verbose=False):
    """ this will extract text from  tif files """
    try:
        if platform.system() == 'Windows':
            pytesseract.pytesseract.tesseract_cmd = ocr_path

        files = set(
            glob.glob(input_files_path + "/*.TIF") +
            glob.glob(input_files_path + "/*.tif"))

        ll = len(files)
        for i, file in enumerate(files, start=1):
            #  __verbose_print(os.path.basename(file) + " file text is being extracted....", verbose)
            file_name = os.path.basename(file)

            images = MultiImage(file, plugin='pil')
            img_str = ''
            for img in images:
                img_str += pytesseract.image_to_string(Image.fromarray(img))

            f = open(output_files_path + os.sep + file_name[:-4] + ".txt",
                     "a+",
                     encoding="utf-8")
            f.write(img_str)
            f.close()

            __verbose_print(
                str(i) + " of " + str(ll) + " file(s) completed", verbose)
    except Exception as e:
        print(e)
Exemple #4
0
    def __tifutil(self, file, output_path):
        """ this will extract text from tif images """
        file_name = os.path.basename(file)

        images = MultiImage(file, plugin='pil')

        for i, img in enumerate(images, start=1):
            pil_img = Image.fromarray(img)
            path = output_path + os.sep + file_name + '_' + str(i) + ".jpg"
            pil_img.save(path, 'JPEG')
    def __init__(self,
                 slide_fn,
                 level=2,
                 tile_size=128,
                 mask_fn=None,
                 data_provider=None):
        self.slide_fn = slide_fn
        self.level = level
        self.tile_size = tile_size

        self.img = MultiImage(self.slide_fn)[self.level].copy()
        self.dims = np.array(self.img.shape[:2][::-1])
        self.ds_img = MultiImage(self.slide_fn)[2].copy()
        self.tissue_mask = makeTissueMask(self.ds_img)

        self.mask_fn = mask_fn
        self.data_provider = data_provider
        if not (self.mask_fn == None or self.data_provider == None):
            self.mask = MultiImage(mask_fn)[level].sum(axis=-1)

        self.tile_coords = None
class TestMultiImage():

    def setUp(self):
        # This multipage TIF file was created with imagemagick:
        # convert im1.tif im2.tif -adjoin multipage.tif
        if PIL_available:
            self.img = MultiImage(os.path.join(data_dir, 'multipage.tif'))

    @skipif(not PIL_available)
    def test_len(self):
        assert len(self.img) == 2

    @skipif(not PIL_available)
    def test_getitem(self):
        num = len(self.img)
        for i in range(-num, num):
            assert type(self.img[i]) is np.ndarray
        assert_array_almost_equal(self.img[0],
                                  self.img[-num])

        #assert_raises expects a callable, hence this do-very-little func
        def return_img(n):
            return self.img[n]
        assert_raises(IndexError, return_img, num)
        assert_raises(IndexError, return_img, -num - 1)

    @skipif(not PIL_available)
    def test_files_property(self):
        assert isinstance(self.img.filename, six.string_types)

        def set_filename(f):
            self.img.filename = f
        assert_raises(AttributeError, set_filename, 'newfile')

    @skipif(not PIL_available)
    def test_conserve_memory_property(self):
        assert isinstance(self.img.conserve_memory, bool)

        def set_mem(val):
            self.img.conserve_memory = val
        assert_raises(AttributeError, set_mem, True)

    @skipif(not PIL_available)
    def test_concatenate(self):
        ar = self.img.concatenate()
        assert_equal(ar.shape, (len(self.img),) + 
                                self.img[0].shape)
Exemple #7
0
    def __cropPatchesFromImage(self, image_name, downsample_level=2):
        # downsample_level: 0, 1, 2
        # Resolution downsample levels: 1, 4, 16
        multi_image = MultiImage(image_name)
        image_to_crop = multi_image[downsample_level]
        image_shape = image_to_crop.shape
        resolution_relation = 4 ** (2 - downsample_level)
        patch_shape = (self.__patch_size, self.__patch_size)

        # Find coordinates from where to select patch
        cell_coordinates = self.__getCellCoordinatesFromImage(
            multi_image, resolution_relation, image_shape)

        # Crop patches
        patches = []
        for i in range(self.__patches_per_image):
            j = 0
            while True:
                j += 1
                random_index = random.randint(0, cell_coordinates.shape[1] - 1)

                # Scale coordinates by the number of resolution relation
                # between low-resolution image and high/mid-resolution
                start_y, start_x = \
                    cell_coordinates[:, random_index] * resolution_relation
                start_x = max(0, min(
                    start_x, image_shape[1] - self.__patch_size))
                start_y = max(0, min(
                    start_y, image_shape[0] - self.__patch_size))
                end_x, end_y = np.array(
                    [start_x, start_y]) + self.__patch_size

                # Crop from mid/high resolution image
                patch = image_to_crop[start_y:end_y, start_x:end_x]

                # Resize if original image size was smaller than patch_size
                if patch.shape[:2] != patch_shape:
                    patch = cv2.resize(
                        patch, dsize=patch_shape,
                        interpolation=cv2.INTER_CUBIC)

                # Patch has enough colored areas (not pure white) or has been
                # iterated more than 5 times
                if np.mean(patch) < 230 or j >= 5:
                    patches.append(patch)
                    break
        return patches
class TestMultiImage():
    def setUp(self):
        # This multipage TIF file was created with imagemagick:
        # convert im1.tif im2.tif -adjoin multipage.tif
        if PIL_available:
            self.img = MultiImage(os.path.join(data_dir, 'multipage.tif'))

    @skipif(not PIL_available)
    def test_len(self):
        assert len(self.img) == 2

    @skipif(not PIL_available)
    def test_getitem(self):
        num = len(self.img)
        for i in range(-num, num):
            assert type(self.img[i]) is np.ndarray
        assert_array_almost_equal(self.img[0], self.img[-num])

        #assert_raises expects a callable, hence this do-very-little func
        def return_img(n):
            return self.img[n]

        assert_raises(IndexError, return_img, num)
        assert_raises(IndexError, return_img, -num - 1)

    @skipif(not PIL_available)
    def test_files_property(self):
        assert isinstance(self.img.filename, six.string_types)

        def set_filename(f):
            self.img.filename = f

        assert_raises(AttributeError, set_filename, 'newfile')

    @skipif(not PIL_available)
    def test_conserve_memory_property(self):
        assert isinstance(self.img.conserve_memory, bool)

        def set_mem(val):
            self.img.conserve_memory = val

        assert_raises(AttributeError, set_mem, True)

    @skipif(not PIL_available)
    def test_concatenate(self):
        ar = self.img.concatenate()
        assert_equal(ar.shape, (len(self.img), ) + self.img[0].shape)
Exemple #9
0
 def __tile(self, img_path, mask_path, number_of_tiles=12):
     img = MultiImage(img_path)[-1]
     mask = MultiImage(mask_path)[-1]
     shape = img.shape
     pad0, pad1 = (self.__patch_size -
                   shape[0] % self.__patch_size) % self.__patch_size, (
                       self.__patch_size -
                       shape[1] % self.__patch_size) % self.__patch_size
     img = np.pad(img, [[pad0 // 2, pad0 - pad0 // 2],
                        [pad1 // 2, pad1 - pad1 // 2], [0, 0]],
                  constant_values=255)
     mask = np.pad(mask, [[pad0 // 2, pad0 - pad0 // 2],
                          [pad1 // 2, pad1 - pad1 // 2], [0, 0]],
                   constant_values=0)
     img = img.reshape(img.shape[0] // self.__patch_size, self.__patch_size,
                       img.shape[1] // self.__patch_size, self.__patch_size,
                       3)
     img = img.transpose(0, 2, 1, 3, 4).reshape(-1, self.__patch_size,
                                                self.__patch_size, 3)
     mask = mask.reshape(mask.shape[0] // self.__patch_size,
                         self.__patch_size,
                         mask.shape[1] // self.__patch_size,
                         self.__patch_size, 3)
     mask = mask.transpose(0, 2, 1, 3, 4).reshape(-1, self.__patch_size,
                                                  self.__patch_size, 3)
     if len(img) < number_of_tiles:
         mask = np.pad(
             mask,
             [[0, number_of_tiles - len(img)], [0, 0], [0, 0], [0, 0]],
             constant_values=0)
         img = np.pad(
             img, [[0, number_of_tiles - len(img)], [0, 0], [0, 0], [0, 0]],
             constant_values=255)
     idxs = np.argsort(img.reshape(img.shape[0],
                                   -1).sum(-1))[:number_of_tiles]
     img = img[idxs]
     mask = mask[idxs]
     return img, mask
Exemple #10
0
    def __getitem__(self, idx):
        path = self.image_path + self.image_id[idx]

        if self.is_train or self.is_val:
            path += '.png'
            image = cv2.imread(path)
        else:
            path += '.tiff'
            image = MultiImage(path)[-1]
            image = cv2.resize(image, (HEIGHT, WIDTH))

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = self.aug(image=image)['image'].reshape((3, HEIGHT, WIDTH))
        
        if self.is_train or self.is_val:
            isup_grade = cat([self.data.isup_grade[idx]], num_classes=6)
            gleason_0 = cat([self.data.gleason_score[idx][0]], num_classes=5)
            gleason_1 = cat([self.data.gleason_score[idx][1]], num_classes=5)
            target = np.concatenate([isup_grade, gleason_0, gleason_1], axis=1)
            
        if self.is_train or self.is_val:
            return FloatTensor(image), FloatTensor(target)
        else:
            return FloatTensor(image)
Exemple #11
0
import os

import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid import AxesGrid

from skimage.io import MultiImage
from skimage import data_dir

# Load the multi-layer image
fname = os.path.join(data_dir, 'multipage.tif')
img = MultiImage(fname)

# Create an image grid
fig = plt.figure()
grid = AxesGrid(fig, rect=(1, 1, 1), nrows_ncols=(1, 2), axes_pad=0.1)

# Plot the layers on the image grid
for i, frame in enumerate(img):
    grid[i].imshow(frame, cmap=plt.cm.gray)
    grid[i].set_xlabel('Frame %s' % i)
    grid[i].set_xticks([])
    grid[i].set_yticks([])

plt.show()
 def setUp(self):
     # This multipage TIF file was created with imagemagick:
     # convert im1.tif im2.tif -adjoin multipage.tif
     if PIL_available:
         self.img = MultiImage(os.path.join(data_dir, 'multipage.tif'))
class Slide:
    def __init__(self,
                 slide_fn,
                 level=2,
                 tile_size=128,
                 mask_fn=None,
                 data_provider=None):
        self.slide_fn = slide_fn
        self.level = level
        self.tile_size = tile_size

        self.img = MultiImage(self.slide_fn)[self.level].copy()
        self.dims = np.array(self.img.shape[:2][::-1])
        self.ds_img = MultiImage(self.slide_fn)[2].copy()
        self.tissue_mask = makeTissueMask(self.ds_img)

        self.mask_fn = mask_fn
        self.data_provider = data_provider
        if not (self.mask_fn == None or self.data_provider == None):
            self.mask = MultiImage(mask_fn)[level].sum(axis=-1)

        self.tile_coords = None

    def getTileCoords(self,
                      num_tiles: int,
                      sampling_method='skeleton',
                      tissue_th: tuple = (0.2, 0.7),
                      seed=None,
                      offset: tuple = (0, 0)):
        ''' 
            Find `num_indices` indices with maximal amount of tissue. `tissue_th`  is the slice of tissue percentage ~(min, max)
            within which we allow the search: sometimes the `max` might not give enough tiles 
            for the mosaic, so we can decrease it gradually until `min` is reached. 

            offset = (x,y) coord offset in tile size fractions that is applied to sampled coords. This is meant as a form of data augmentation.
        '''

        assert sampling_method in {'skeleton', 'tissue_pct', 'slic'}
        "`sampling_method` should be one of 'skeleton', 'tissue_pct' or 'slic'"

        level_offset = 2 - self.level

        if sampling_method == 'skeleton':

            # Determine skeleton from filled tissue mask
            filled_tissue_mask = fillTissueMask(self.tissue_mask.copy())
            filled_tissue_mask = filled_tissue_mask // 255  # needs to be an array of 0s and 1s
            skeleton = skeletonize(filled_tissue_mask, method='lee')
            self.skeleton = np.uint8(np.where(skeleton != 0, 255, 0))

            skeleton = cv2.dilate(self.skeleton, None)
            contours, _ = cv2.findContours(skeleton, 0, cv2.CHAIN_APPROX_NONE)

            # Filter contours based on length
            arch_lens = []
            valid_indices = []
            radius = int(self.tile_size / (4**level_offset))
            for idx, arch in enumerate(contours):
                c = cv2.arcLength(arch, False)
                c = c / 2
                if c < radius / 4:
                    continue

                valid_indices.append(idx)
                arch_lens.append(c)

            if not np.array(valid_indices).size == 0:
                contours = np.array(contours)[valid_indices]
            else:  #if no main skeletons were found, it could be that the tissue slide is just very small
                arch_lens = [1 / len(contours)] * len(contours)

            # Extract points from the accepted contours
            weights = np.array(arch_lens) / np.sum(arch_lens)
            points_per_arch = distributeIntToChunks(
                num_tiles, weights)  # <- number of points to be extracted
            for idx, arch in enumerate(contours):

                num_indices = points_per_arch[idx]
                output = np.zeros_like(skeleton)
                cv2.drawContours(output, [arch], -1, 1, 1)

                y_, x_ = np.where(output)

                # Simplify the shape by fitting circles
                arch = np.dstack([x_, y_])
                cps = estimateWithCircles(arch, radius)
                cx, cy = cps[..., 0], cps[..., 1]  #

                # Randomly select indices, in case too many; seed if needed
                if len(cx) > num_indices:

                    # Seed if needed
                    if not seed == None:
                        np.random.seed(seed)
                    indices = sorted(
                        np.random.choice(len(cx),
                                         points_per_arch[idx],
                                         replace=False))
                    np.random.seed(None)  # Return clock seed

                    cx, cy = cx[indices], cy[indices]

                # Append to returnables
                if idx == 0:
                    intermediate_coords = np.dstack([cx, cy])
                else:
                    intermediate_coords = np.hstack(
                        [intermediate_coords,
                         np.dstack([cx, cy])])

            # To top-left-corner format
            final_coords = intermediate_coords.squeeze() * 4**level_offset
            if final_coords.shape == (2, ):
                final_coords = np.expand_dims(final_coords, 0)

            final_coords = final_coords - np.array(
                [self.tile_size // 2, self.tile_size // 2])

        elif sampling_method == 'tissue_pct':
            self.top_left_corners = getTopLeftCorners(self.dims,
                                                      self.tile_size)
            self.tissue_pcts = getTissuePercentages(
                self.tissue_mask,
                level_offset=level_offset,
                tile_size=self.tile_size,
                top_left_corners=self.top_left_corners)

            # Find indices
            tth_min, tth = tissue_th
            while len(np.where(self.tissue_pcts > tth)[0]) < num_tiles:
                if tth <= tth_min:
                    break

                tth -= 0.05

            # Indices
            indices = np.where(self.tissue_pcts > tth)[0]

            # Randomly select indices, in case too many; seed if needed
            if len(indices) > num_tiles:
                if not seed == None:
                    np.random.seed(seed)
                indices = sorted(
                    np.random.choice(indices, num_tiles, replace=False))
                np.random.seed(None)  # Return clock seed

            final_coords = self.top_left_corners[indices].copy()

        elif sampling_method == 'slic':

            # Determine SLIC clusters from filled tissue mask
            filled_tissue_mask = fillTissueMask(self.tissue_mask.copy())
            filled_tissue_mask = filled_tissue_mask // 255  # needs to be an array of 0s and 1s

            segments = seg.slic(self.ds_img,
                                compactness=10,
                                seed_type='nplace',
                                mask=filled_tissue_mask,
                                n_segments=num_tiles,
                                multichannel=True,
                                recompute_seeds=True,
                                enforce_connectivity=True)
            indices = [k for k in np.unique(segments) if not k == -1]

            # Randomly select indices, in case too many; seed if needed
            if len(indices) > num_tiles:
                if not seed == None:
                    np.random.seed(seed)
                indices = sorted(
                    np.random.choice(indices, num_tiles, replace=False))
                np.random.seed(None)  # Return clock seed

            for i in indices:
                contours, _ = cv2.findContours(
                    np.uint8(np.where(segments == i, 255, 0)), 0, 1)
                contours = sorted(contours,
                                  key=lambda x: cv2.contourArea(x))[::-1]

                M = cv2.moments(contours[0])
                cx = np.int32(M['m10'] / M['m00'])
                cy = np.int32(M['m01'] / M['m00'])

                if i == 0:
                    intermediate_coords = np.dstack([cx, cy])
                else:
                    intermediate_coords = np.hstack(
                        [intermediate_coords,
                         np.dstack([cx, cy])])

            # Append more cluster contours if num_tiles has not been reached
            if len(indices) < num_tiles:
                enough_tiles = False
                for i in indices:
                    contours, _ = cv2.findContours(
                        np.uint8(np.where(segments == i, 255, 0)), 0, 1)
                    contours = sorted(contours,
                                      key=lambda x: cv2.contourArea(x))[::-1]

                    for j, cnt in enumerate(contours):
                        # accept a slic cluster contour if it's area is at least 5% of tile area
                        if j != 0 and cv2.contourArea(cnt) > (
                                0.05 * self.tile_size / (4**level_offset))**2:
                            M = cv2.moments(cnt)
                            cx = np.int32(M['m10'] / M['m00'])
                            cy = np.int32(M['m01'] / M['m00'])

                            intermediate_coords = np.hstack(
                                [intermediate_coords,
                                 np.dstack([cx, cy])])

                            if intermediate_coords.shape[1] == 12:
                                enough_tiles = True
                                break
                    if enough_tiles:
                        break

            # To top-left-corner format
            final_coords = intermediate_coords.squeeze() * 4**level_offset
            final_coords = final_coords - np.array(
                [self.tile_size // 2, self.tile_size // 2])

        # apply offset to coordinates
        final_coords = final_coords + np.array(
            [int(self.tile_size * offset[0]),
             int(self.tile_size * offset[1])])

        self.tile_coords = final_coords.copy()

    def getTiles(self,
                 stack: bool = False,
                 sampling_method: str = 'skeleton',
                 mosaic_grid: tuple = (4, 3),
                 output_tile_size: int = 128,
                 tissue_th: tuple = (0.1, 0.7),
                 seed: int = None,
                 offset: tuple = (0, 0)):
        ''' Get tiles from the slide and stack into mosaic if needed 
        
        offset = (x,y) coord offset in tile size fractions that is applied to sampled coords. This is meant as a form of data augmentation.
        '''

        # Solve indices to be used in mosaic
        m, n = mosaic_grid
        self.getTileCoords(num_tiles=n * m,
                           sampling_method=sampling_method,
                           tissue_th=tissue_th,
                           seed=seed,
                           offset=offset)

        # Read regions
        output_img = np.ones([n * m, output_tile_size, output_tile_size, 3],
                             dtype='uint8') * 255

        for idx, coord in enumerate(self.tile_coords):
            x, y = coord
            left, right = np.int32(
                np.clip([x, x + self.tile_size], 0, self.dims[0]))
            bottom, top = np.int32(
                np.clip([y, y + self.tile_size], 0, self.dims[1]))

            tile = self.img[bottom:top, left:right].copy()
            tile = padIfNeeded(tile,
                               tgt_width=self.tile_size,
                               tgt_height=self.tile_size)
            tile = cv2.resize(tile, (output_tile_size, ) * 2)

            output_img[idx] = np.uint8(tile)

        if len(self.tile_coords) < m * n:
            warnings.warn("Could not find enough unique tiles for the slide"
                          "(tiles: %s/%s, slide: %s" %
                          (len(self.tile_coords), m * n, self.slide_fn))

        # Stack to single array of (m,n) tiles if needed
        if stack:
            output_img = [
                np.hstack([output_img[i * n + j] for j in range(n)])
                for i in range(m)
            ]
            output_img = np.vstack(output_img)

        return np.array(output_img)

    def getTilesCancerStatus(self,
                             stack: bool = False,
                             mosaic_grid: tuple = (4, 3)):
        ''' Get tiles from the slide and stack into mosaic if needed '''

        # Solve indices to be used in mosaic
        m, n = mosaic_grid

        # Read regions
        output_img = np.zeros([n * m], dtype='uint8')

        for idx, coord in enumerate(self.tile_coords):
            x, y = coord
            left, right = np.int32(
                np.clip([x, x + self.tile_size], 0, self.dims[0]))
            bottom, top = np.int32(
                np.clip([y, y + self.tile_size], 0, self.dims[1]))

            tile = self.mask[bottom:top, left:right].copy()
            tile_cat = tileClassification(tile, self.data_provider)

            output_img[idx] = tile_cat

        # Stack to single array of (m,n) tiles if needed
        if stack:
            output_img = [
                np.hstack([output_img[i * n + j] for j in range(n)])
                for i in range(m)
            ]
            output_img = np.vstack(output_img)

        return np.array(output_img)

    def visualizeCoverage(self, figsize=(16, 16)):
        ''' Visualize the coverage of indices on a slide '''

        background = self.ds_img.copy()
        foreground = background.copy()

        level_offset = 2 - self.level

        for idx, coord in enumerate(self.tile_coords):
            x, y = coord
            left, right = np.int32(
                np.clip([x, x + self.tile_size], 0, self.dims[0]) /
                (4**level_offset))
            bottom, top = np.int32(
                np.clip([y, y + self.tile_size], 0, self.dims[1]) /
                (4**level_offset))

            foreground[bottom:top, left:right] = (0, 255, 0)

        ## Visualize
        output = cv2.addWeighted(background, 0.7, foreground, 0.3, 0)

        plt.figure(figsize=figsize)
        plt.imshow(output)
        plt.show()
    def __cropPatchesFromImage(self, image_name, downsample_level=None):
        patch_shape = (self.__patch_size, self.__patch_size)

        # downsample_level: 0, 1, 2, None (random)
        # Use only 2 or None (MultiImage is used for low resolution image,
        # OpenSlide for high resolution image (to save memory and faster
        # process, Openslide did not work for low resolution image))
        # Resolution downsample levels: 1, 4, 16
        multi_image = MultiImage(image_name)
        use_mixed_resolutions = False
        if downsample_level is None:
            use_mixed_resolutions = True
            image_slide = OpenSlide(image_name)
            image_to_crop = multi_image[-1]
        else:
            image_to_crop = multi_image[downsample_level]
            image_shape = tuple(image_to_crop.shape[::-1][1:])
            resolution_relation = 4**(2 - downsample_level)

        # Find coordinates from where to select patch
        cell_coordinates = self.__getCellCoordinatesFromImage(multi_image)

        # Crop patches
        patches = []
        for i in range(self.__patches_per_image):

            # Choose mixed down sample level (low and high (not mid))
            if use_mixed_resolutions:
                downsample_level = int(i * 2 / self.__patches_per_image) * 2
                image_shape = image_slide.level_dimensions[downsample_level]
                resolution_relation = 4**(2 - downsample_level)

            # Iterate good patch
            for j in range(5):
                random_index = random.randint(0, cell_coordinates.shape[1] - 1)

                # Scale coordinates by the number of resolution relation
                # between low-resolution image and high/mid-resolution.
                # Take center of the cell coordinate by subtracting
                # 0.5*patch_size.
                start_y, start_x = (
                    cell_coordinates[:, random_index] * resolution_relation -
                    int(0.5 * self.__patch_size))
                start_x = max(0,
                              min(start_x, image_shape[0] - self.__patch_size))
                start_y = max(0,
                              min(start_y, image_shape[1] - self.__patch_size))
                end_x, end_y = np.array([start_x, start_y]) + self.__patch_size

                # Crop from mid/high resolution image
                if downsample_level == 0:
                    patch = np.array(
                        image_slide.read_region((start_x, start_y), 0,
                                                patch_shape))[..., :3]
                else:
                    patch = image_to_crop[start_y:end_y, start_x:end_x]

                # Resize if original image size was smaller than patch_size
                if patch.shape[:2] != patch_shape:
                    padding = np.subtract(patch_shape, patch.shape[:2])
                    padding = ([0, padding[0]], [0, padding[1]], [0, 0])
                    patch = np.pad(patch, padding, constant_values=255)

                # Patch has enough colored areas (not pure white)
                # Otherwise iterate again
                if np.mean(patch) < 230:
                    break
            patches.append(patch)
        return patches
 def setUp(self):
     # This multipage TIF file was created with imagemagick:
     # convert im1.tif im2.tif -adjoin multipage.tif
     if PIL_available:
         self.img = MultiImage(os.path.join(data_dir, 'multipage.tif'))
    def __getitem__(self, idx):
        path = os.path.join(self.root_path, 'train_images')
        # Skimage seems to be slightly faster
        #image = openslide.OpenSlide(os.path.join(path, self.df['image_id'].iloc[idx] + '.tiff'))
        image = MultiImage(os.path.join(
            path, self.df['image_id'].iloc[idx] + '.tiff'),
                           conserve_memory=False)[self.level]

        #image = np.array(image.read_region((0, 0), self.level, image.level_dimensions[self.level]))

        # Only look at regions of the image that aren't empty space and put a bounding box on it
        # Find those regions using a subsampled image, since NumPy is slow
        stride = self.patch_size // 8
        f_blank = lambda x, axis: np.mean(
            (x - 255)**2, axis=axis) * np.var(x, axis=axis)
        proportion_blank = block_reduce(image[::stride, ::stride],
                                        block_size=(self.patch_size // stride,
                                                    self.patch_size // stride,
                                                    3),
                                        func=f_blank)

        regions = np.argsort(proportion_blank, axis=None)[::-1]
        x = regions % proportion_blank.shape[1] * self.patch_size
        y = regions // proportion_blank.shape[1] * self.patch_size

        patches = np.full(
            (self.num_patches, self.patch_size, self.patch_size, 3),
            255,
            dtype=np.uint8)
        for i in range(min(self.num_patches, x.shape[0])):
            img = image[y[i]:y[i] + self.patch_size,
                        x[i]:x[i] + self.patch_size]
            patches[i, :img.shape[0], :img.shape[1]] = img
        image = patches

        label = torch.zeros(5)
        label[:self.df['isup_grade'].iloc[idx]] = 1

        if self.use_mask:
            #mask = openslide.OpenSlide(os.path.join(self.root_path, 'train_label_masks', self.df['image_id'].iloc[idx] + '_mask.tiff'))
            mask = MultiImage(os.path.join(
                self.root_path, 'train_label_masks',
                self.df['image_id'].iloc[idx] + '_mask.tiff'),
                              conserve_memory=False)[self.level]
            mask = mask[..., 0]

            mask_patches = np.zeros(
                (self.num_patches, self.patch_size, self.patch_size),
                dtype=np.uint8)
            for i in range(min(self.num_patches, x.shape[0])):
                msk = mask[y[i]:y[i] + self.patch_size,
                           x[i]:x[i] + self.patch_size]
                mask_patches[i, :msk.shape[0], :msk.shape[1]] = msk
            mask = mask_patches

            if self.df['data_provider'].iloc[
                    idx] == 'karolinska':  # Different data providers have different mask formats, normalise them to be the same
                mask[mask == 2] = 3
                mask[mask == 1] = 2

            if self.transforms:
                for i in range(
                        self.num_patches
                ):  # We need to iterate and apply to each image separately
                    augmented = self.transforms(image=image[i], mask=mask[i])
                    image[i] = augmented['image']
                    mask[i] = augmented['mask']

            # Convert our mask to binned binary just like the labels
            mask_binary = np.zeros(
                (mask.shape[0], 6, mask.shape[1], mask.shape[2]))
            for i in range(6):
                mask_binary[:, i] = (i == mask)
            mask = mask_binary

            #n = int(np.sqrt(self.num_patches))
            #image = image.reshape(n, n, self.patch_size, self.patch_size, 3).transpose((0, 2, 1, 3, 4)).reshape(n * self.patch_size, n * self.patch_size, 3)
            #mask = mask.reshape(n, n, self.patch_size, self.patch_size, 6).transpose((0, 2, 1, 3, 4)).reshape(n * self.patch_size, n * self.patch_size, 6)

            return torch.tensor(image).permute(0, 3, 1,
                                               2), (torch.tensor(mask), label)

        if self.transforms:
            for i in range(
                    self.num_patches
            ):  # We need to iterate and apply to each image separately
                image[i] = self.transforms(image=image[i])['image']

        #n = int(np.sqrt(self.num_patches))
        #image = image.reshape(n, n, self.patch_size, self.patch_size, 3).transpose((0, 2, 1, 3, 4)).reshape(n * self.patch_size, n * self.patch_size, 6)

        return torch.tensor(image).permute(0, 3, 1, 2), label
Exemple #17
0
import os
import json
import sys
sys.path.append("../")

from tqdm import tqdm
from skimage.io import MultiImage
import cv2 as cv

from utils.data_utils import get_tile
import matplotlib.pyplot as plt

images_dir = "../input/prostate-cancer-grade-assessment/train_images"
output_dir = "../input/256_36_hsv"

with open("../notebooks/256_36_hsv.json", 'r') as file:
    data = json.load(file)

os.makedirs(output_dir, exist_ok=True)


for path, boxes in tqdm(data.items()):
    img = MultiImage(os.path.join(images_dir, path) + ".tiff")[1]
    img = get_tile(img, boxes, 256, 36)
    img = 255 - cv.cvtColor(img, cv.COLOR_RGB2BGR)
    cv.imwrite(os.path.join(output_dir, path) + ".png", img)
    def _worker(paths: Tuple[Path, Optional[Path]], namespace) -> NoReturn:
        self = namespace.self
        train_meta = namespace.train_meta
        image_path, mask_path = paths
        name = image_path.stem
        mask_path = Path(
            str(image_path).replace("train_images",
                                    "train_label_masks").replace(
                                        ".tiff", "_mask.tiff"))

        image_slide = MultiImage(str(image_path))
        mask_slide = MultiImage(str(mask_path))
        large_image = get_layer_safely(image_slide, layer=0)
        large_mask = get_layer_safely(
            mask_slide, layer=0, is_mask=True) if mask_path.exists() else None
        small_image = get_layer_safely(image_slide, layer=2)
        if large_image is None:
            return

        if small_image is None:
            scale = 1 / 16
            small_image = cv2.resize(large_image,
                                     dsize=(0, 0),
                                     fx=scale,
                                     fy=scale,
                                     interpolation=cv2.INTER_LANCZOS4)

        try:
            pre_processor = ImagePreProcessor(reduce_memory=False)
            large_image = pre_processor.dual(large_image, small_image)
            if large_mask is not None:
                large_mask = pre_processor.single(large_mask)

            row = train_meta[train_meta.image_id == name].iloc[0]
            data_provider = row["data_provider"]
            gleason_score = row["gleason_score"]
            label = row["isup_grade"]
            slide = OpenSlide(str(image_path))
            additional = {
                "data_provider": data_provider,
                "gleason_score": gleason_score,
                "image_shape": large_image.shape[:2],
                "source_image_shape": slide.dimensions,
                "x_resolution": float(slide.properties["tiff.XResolution"]),
                "y_resolution": float(slide.properties["tiff.YResolution"]),
                "resolution_unit": slide.properties["tiff.ResolutionUnit"]
            }

            if large_mask is None:
                visualization = None
            else:
                masked = draw_overlay_mask(
                    large_image,
                    large_mask,
                    color_map=get_color_map(data_provider, normalized=False))
                title_text = f"{data_provider} - id={name[:10]} isup={label} gleason={gleason_score}"
                visualization = plot_meta(
                    masked,
                    title_text,
                    color_map=get_color_map(data_provider, normalized=True),
                    classname_map=get_classname_map(data_provider),
                    show_keys=list(np.unique(large_mask)))

            record = Record(large_image,
                            large_mask,
                            visualization,
                            name,
                            label,
                            phase=Phase.TRAIN,
                            additional=additional)
            self._writer.put(record)

        except Exception as e:
            print(f"{name} - {e}")
Exemple #19
0
def gettextFrom_tiff_Image(file):
    qq = MultiImage(file, plugin='pil')
    for i, frame in enumerate(qq, start=1):
        pil_img = Image.fromarray(frame)
        img_str = pytesseract.image_to_string(pil_img)
        print(img_str)
Exemple #20
0
    return result_img, minimal_boxes


# In[6]:


names = [name for name in os.listdir(IMAGES)]
compact_representation = {}

mean_ratio = 0

for name in tqdm(names):
    img_path = os.path.join(IMAGES, name)

    img = MultiImage(img_path)[-1]

    compact_image, minimal_boxes = get_minimal_image(img)
    compact_representation[name] = {"original_size": img.shape[:2], "rectangles": minimal_boxes}

    mean_ratio += np.prod(compact_image.shape[:2]) / np.prod(img.shape[:2])
print(f"Mean ratio: {mean_ratio / len(names)}")
#
#
# # In[7]:
#
#
# with open("../dataset/compact_representation.json", "w") as file:
#     json.dump(compact_representation, file)