def __getitem__(self, item): name = self.images[item] path = os.path.join(self.data_dir, name) label = self.labels[item] label = np.array([(1 if i < label else 0) for i in range(5)], dtype=np.float32) img = MultiImage(path)[self.tiff_scale] img = cv.cvtColor(img, cv.COLOR_RGB2BGR) img = tile(img, self.sz, self.N, self.transforms, self.random) img = torch.from_numpy(img.transpose(2, 0, 1)) return img, label
def __getitem__(self, item): name = self.images[item] if not os.path.splitext(name)[1]: name += ".tiff" path = os.path.join(self.data_dir, name) img = MultiImage(path)[self.tiff_scale] img = cv.cvtColor(img, cv.COLOR_RGB2BGR) img, _ = get_minimal_image(img) img = tile(img, self.tile_size, self.num_tiles) if self.transforms is not None: img = self.transforms(image=img)["image"] img = torch.from_numpy(img.transpose(2, 0, 1)) return img
def tif_txt_extract(input_files_path, output_files_path, ocr_path=None, verbose=False): """ this will extract text from tif files """ try: if platform.system() == 'Windows': pytesseract.pytesseract.tesseract_cmd = ocr_path files = set( glob.glob(input_files_path + "/*.TIF") + glob.glob(input_files_path + "/*.tif")) ll = len(files) for i, file in enumerate(files, start=1): # __verbose_print(os.path.basename(file) + " file text is being extracted....", verbose) file_name = os.path.basename(file) images = MultiImage(file, plugin='pil') img_str = '' for img in images: img_str += pytesseract.image_to_string(Image.fromarray(img)) f = open(output_files_path + os.sep + file_name[:-4] + ".txt", "a+", encoding="utf-8") f.write(img_str) f.close() __verbose_print( str(i) + " of " + str(ll) + " file(s) completed", verbose) except Exception as e: print(e)
def __tifutil(self, file, output_path): """ this will extract text from tif images """ file_name = os.path.basename(file) images = MultiImage(file, plugin='pil') for i, img in enumerate(images, start=1): pil_img = Image.fromarray(img) path = output_path + os.sep + file_name + '_' + str(i) + ".jpg" pil_img.save(path, 'JPEG')
def __init__(self, slide_fn, level=2, tile_size=128, mask_fn=None, data_provider=None): self.slide_fn = slide_fn self.level = level self.tile_size = tile_size self.img = MultiImage(self.slide_fn)[self.level].copy() self.dims = np.array(self.img.shape[:2][::-1]) self.ds_img = MultiImage(self.slide_fn)[2].copy() self.tissue_mask = makeTissueMask(self.ds_img) self.mask_fn = mask_fn self.data_provider = data_provider if not (self.mask_fn == None or self.data_provider == None): self.mask = MultiImage(mask_fn)[level].sum(axis=-1) self.tile_coords = None
class TestMultiImage(): def setUp(self): # This multipage TIF file was created with imagemagick: # convert im1.tif im2.tif -adjoin multipage.tif if PIL_available: self.img = MultiImage(os.path.join(data_dir, 'multipage.tif')) @skipif(not PIL_available) def test_len(self): assert len(self.img) == 2 @skipif(not PIL_available) def test_getitem(self): num = len(self.img) for i in range(-num, num): assert type(self.img[i]) is np.ndarray assert_array_almost_equal(self.img[0], self.img[-num]) #assert_raises expects a callable, hence this do-very-little func def return_img(n): return self.img[n] assert_raises(IndexError, return_img, num) assert_raises(IndexError, return_img, -num - 1) @skipif(not PIL_available) def test_files_property(self): assert isinstance(self.img.filename, six.string_types) def set_filename(f): self.img.filename = f assert_raises(AttributeError, set_filename, 'newfile') @skipif(not PIL_available) def test_conserve_memory_property(self): assert isinstance(self.img.conserve_memory, bool) def set_mem(val): self.img.conserve_memory = val assert_raises(AttributeError, set_mem, True) @skipif(not PIL_available) def test_concatenate(self): ar = self.img.concatenate() assert_equal(ar.shape, (len(self.img),) + self.img[0].shape)
def __cropPatchesFromImage(self, image_name, downsample_level=2): # downsample_level: 0, 1, 2 # Resolution downsample levels: 1, 4, 16 multi_image = MultiImage(image_name) image_to_crop = multi_image[downsample_level] image_shape = image_to_crop.shape resolution_relation = 4 ** (2 - downsample_level) patch_shape = (self.__patch_size, self.__patch_size) # Find coordinates from where to select patch cell_coordinates = self.__getCellCoordinatesFromImage( multi_image, resolution_relation, image_shape) # Crop patches patches = [] for i in range(self.__patches_per_image): j = 0 while True: j += 1 random_index = random.randint(0, cell_coordinates.shape[1] - 1) # Scale coordinates by the number of resolution relation # between low-resolution image and high/mid-resolution start_y, start_x = \ cell_coordinates[:, random_index] * resolution_relation start_x = max(0, min( start_x, image_shape[1] - self.__patch_size)) start_y = max(0, min( start_y, image_shape[0] - self.__patch_size)) end_x, end_y = np.array( [start_x, start_y]) + self.__patch_size # Crop from mid/high resolution image patch = image_to_crop[start_y:end_y, start_x:end_x] # Resize if original image size was smaller than patch_size if patch.shape[:2] != patch_shape: patch = cv2.resize( patch, dsize=patch_shape, interpolation=cv2.INTER_CUBIC) # Patch has enough colored areas (not pure white) or has been # iterated more than 5 times if np.mean(patch) < 230 or j >= 5: patches.append(patch) break return patches
class TestMultiImage(): def setUp(self): # This multipage TIF file was created with imagemagick: # convert im1.tif im2.tif -adjoin multipage.tif if PIL_available: self.img = MultiImage(os.path.join(data_dir, 'multipage.tif')) @skipif(not PIL_available) def test_len(self): assert len(self.img) == 2 @skipif(not PIL_available) def test_getitem(self): num = len(self.img) for i in range(-num, num): assert type(self.img[i]) is np.ndarray assert_array_almost_equal(self.img[0], self.img[-num]) #assert_raises expects a callable, hence this do-very-little func def return_img(n): return self.img[n] assert_raises(IndexError, return_img, num) assert_raises(IndexError, return_img, -num - 1) @skipif(not PIL_available) def test_files_property(self): assert isinstance(self.img.filename, six.string_types) def set_filename(f): self.img.filename = f assert_raises(AttributeError, set_filename, 'newfile') @skipif(not PIL_available) def test_conserve_memory_property(self): assert isinstance(self.img.conserve_memory, bool) def set_mem(val): self.img.conserve_memory = val assert_raises(AttributeError, set_mem, True) @skipif(not PIL_available) def test_concatenate(self): ar = self.img.concatenate() assert_equal(ar.shape, (len(self.img), ) + self.img[0].shape)
def __tile(self, img_path, mask_path, number_of_tiles=12): img = MultiImage(img_path)[-1] mask = MultiImage(mask_path)[-1] shape = img.shape pad0, pad1 = (self.__patch_size - shape[0] % self.__patch_size) % self.__patch_size, ( self.__patch_size - shape[1] % self.__patch_size) % self.__patch_size img = np.pad(img, [[pad0 // 2, pad0 - pad0 // 2], [pad1 // 2, pad1 - pad1 // 2], [0, 0]], constant_values=255) mask = np.pad(mask, [[pad0 // 2, pad0 - pad0 // 2], [pad1 // 2, pad1 - pad1 // 2], [0, 0]], constant_values=0) img = img.reshape(img.shape[0] // self.__patch_size, self.__patch_size, img.shape[1] // self.__patch_size, self.__patch_size, 3) img = img.transpose(0, 2, 1, 3, 4).reshape(-1, self.__patch_size, self.__patch_size, 3) mask = mask.reshape(mask.shape[0] // self.__patch_size, self.__patch_size, mask.shape[1] // self.__patch_size, self.__patch_size, 3) mask = mask.transpose(0, 2, 1, 3, 4).reshape(-1, self.__patch_size, self.__patch_size, 3) if len(img) < number_of_tiles: mask = np.pad( mask, [[0, number_of_tiles - len(img)], [0, 0], [0, 0], [0, 0]], constant_values=0) img = np.pad( img, [[0, number_of_tiles - len(img)], [0, 0], [0, 0], [0, 0]], constant_values=255) idxs = np.argsort(img.reshape(img.shape[0], -1).sum(-1))[:number_of_tiles] img = img[idxs] mask = mask[idxs] return img, mask
def __getitem__(self, idx): path = self.image_path + self.image_id[idx] if self.is_train or self.is_val: path += '.png' image = cv2.imread(path) else: path += '.tiff' image = MultiImage(path)[-1] image = cv2.resize(image, (HEIGHT, WIDTH)) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = self.aug(image=image)['image'].reshape((3, HEIGHT, WIDTH)) if self.is_train or self.is_val: isup_grade = cat([self.data.isup_grade[idx]], num_classes=6) gleason_0 = cat([self.data.gleason_score[idx][0]], num_classes=5) gleason_1 = cat([self.data.gleason_score[idx][1]], num_classes=5) target = np.concatenate([isup_grade, gleason_0, gleason_1], axis=1) if self.is_train or self.is_val: return FloatTensor(image), FloatTensor(target) else: return FloatTensor(image)
import os import matplotlib.pyplot as plt from mpl_toolkits.axes_grid import AxesGrid from skimage.io import MultiImage from skimage import data_dir # Load the multi-layer image fname = os.path.join(data_dir, 'multipage.tif') img = MultiImage(fname) # Create an image grid fig = plt.figure() grid = AxesGrid(fig, rect=(1, 1, 1), nrows_ncols=(1, 2), axes_pad=0.1) # Plot the layers on the image grid for i, frame in enumerate(img): grid[i].imshow(frame, cmap=plt.cm.gray) grid[i].set_xlabel('Frame %s' % i) grid[i].set_xticks([]) grid[i].set_yticks([]) plt.show()
def setUp(self): # This multipage TIF file was created with imagemagick: # convert im1.tif im2.tif -adjoin multipage.tif if PIL_available: self.img = MultiImage(os.path.join(data_dir, 'multipage.tif'))
class Slide: def __init__(self, slide_fn, level=2, tile_size=128, mask_fn=None, data_provider=None): self.slide_fn = slide_fn self.level = level self.tile_size = tile_size self.img = MultiImage(self.slide_fn)[self.level].copy() self.dims = np.array(self.img.shape[:2][::-1]) self.ds_img = MultiImage(self.slide_fn)[2].copy() self.tissue_mask = makeTissueMask(self.ds_img) self.mask_fn = mask_fn self.data_provider = data_provider if not (self.mask_fn == None or self.data_provider == None): self.mask = MultiImage(mask_fn)[level].sum(axis=-1) self.tile_coords = None def getTileCoords(self, num_tiles: int, sampling_method='skeleton', tissue_th: tuple = (0.2, 0.7), seed=None, offset: tuple = (0, 0)): ''' Find `num_indices` indices with maximal amount of tissue. `tissue_th` is the slice of tissue percentage ~(min, max) within which we allow the search: sometimes the `max` might not give enough tiles for the mosaic, so we can decrease it gradually until `min` is reached. offset = (x,y) coord offset in tile size fractions that is applied to sampled coords. This is meant as a form of data augmentation. ''' assert sampling_method in {'skeleton', 'tissue_pct', 'slic'} "`sampling_method` should be one of 'skeleton', 'tissue_pct' or 'slic'" level_offset = 2 - self.level if sampling_method == 'skeleton': # Determine skeleton from filled tissue mask filled_tissue_mask = fillTissueMask(self.tissue_mask.copy()) filled_tissue_mask = filled_tissue_mask // 255 # needs to be an array of 0s and 1s skeleton = skeletonize(filled_tissue_mask, method='lee') self.skeleton = np.uint8(np.where(skeleton != 0, 255, 0)) skeleton = cv2.dilate(self.skeleton, None) contours, _ = cv2.findContours(skeleton, 0, cv2.CHAIN_APPROX_NONE) # Filter contours based on length arch_lens = [] valid_indices = [] radius = int(self.tile_size / (4**level_offset)) for idx, arch in enumerate(contours): c = cv2.arcLength(arch, False) c = c / 2 if c < radius / 4: continue valid_indices.append(idx) arch_lens.append(c) if not np.array(valid_indices).size == 0: contours = np.array(contours)[valid_indices] else: #if no main skeletons were found, it could be that the tissue slide is just very small arch_lens = [1 / len(contours)] * len(contours) # Extract points from the accepted contours weights = np.array(arch_lens) / np.sum(arch_lens) points_per_arch = distributeIntToChunks( num_tiles, weights) # <- number of points to be extracted for idx, arch in enumerate(contours): num_indices = points_per_arch[idx] output = np.zeros_like(skeleton) cv2.drawContours(output, [arch], -1, 1, 1) y_, x_ = np.where(output) # Simplify the shape by fitting circles arch = np.dstack([x_, y_]) cps = estimateWithCircles(arch, radius) cx, cy = cps[..., 0], cps[..., 1] # # Randomly select indices, in case too many; seed if needed if len(cx) > num_indices: # Seed if needed if not seed == None: np.random.seed(seed) indices = sorted( np.random.choice(len(cx), points_per_arch[idx], replace=False)) np.random.seed(None) # Return clock seed cx, cy = cx[indices], cy[indices] # Append to returnables if idx == 0: intermediate_coords = np.dstack([cx, cy]) else: intermediate_coords = np.hstack( [intermediate_coords, np.dstack([cx, cy])]) # To top-left-corner format final_coords = intermediate_coords.squeeze() * 4**level_offset if final_coords.shape == (2, ): final_coords = np.expand_dims(final_coords, 0) final_coords = final_coords - np.array( [self.tile_size // 2, self.tile_size // 2]) elif sampling_method == 'tissue_pct': self.top_left_corners = getTopLeftCorners(self.dims, self.tile_size) self.tissue_pcts = getTissuePercentages( self.tissue_mask, level_offset=level_offset, tile_size=self.tile_size, top_left_corners=self.top_left_corners) # Find indices tth_min, tth = tissue_th while len(np.where(self.tissue_pcts > tth)[0]) < num_tiles: if tth <= tth_min: break tth -= 0.05 # Indices indices = np.where(self.tissue_pcts > tth)[0] # Randomly select indices, in case too many; seed if needed if len(indices) > num_tiles: if not seed == None: np.random.seed(seed) indices = sorted( np.random.choice(indices, num_tiles, replace=False)) np.random.seed(None) # Return clock seed final_coords = self.top_left_corners[indices].copy() elif sampling_method == 'slic': # Determine SLIC clusters from filled tissue mask filled_tissue_mask = fillTissueMask(self.tissue_mask.copy()) filled_tissue_mask = filled_tissue_mask // 255 # needs to be an array of 0s and 1s segments = seg.slic(self.ds_img, compactness=10, seed_type='nplace', mask=filled_tissue_mask, n_segments=num_tiles, multichannel=True, recompute_seeds=True, enforce_connectivity=True) indices = [k for k in np.unique(segments) if not k == -1] # Randomly select indices, in case too many; seed if needed if len(indices) > num_tiles: if not seed == None: np.random.seed(seed) indices = sorted( np.random.choice(indices, num_tiles, replace=False)) np.random.seed(None) # Return clock seed for i in indices: contours, _ = cv2.findContours( np.uint8(np.where(segments == i, 255, 0)), 0, 1) contours = sorted(contours, key=lambda x: cv2.contourArea(x))[::-1] M = cv2.moments(contours[0]) cx = np.int32(M['m10'] / M['m00']) cy = np.int32(M['m01'] / M['m00']) if i == 0: intermediate_coords = np.dstack([cx, cy]) else: intermediate_coords = np.hstack( [intermediate_coords, np.dstack([cx, cy])]) # Append more cluster contours if num_tiles has not been reached if len(indices) < num_tiles: enough_tiles = False for i in indices: contours, _ = cv2.findContours( np.uint8(np.where(segments == i, 255, 0)), 0, 1) contours = sorted(contours, key=lambda x: cv2.contourArea(x))[::-1] for j, cnt in enumerate(contours): # accept a slic cluster contour if it's area is at least 5% of tile area if j != 0 and cv2.contourArea(cnt) > ( 0.05 * self.tile_size / (4**level_offset))**2: M = cv2.moments(cnt) cx = np.int32(M['m10'] / M['m00']) cy = np.int32(M['m01'] / M['m00']) intermediate_coords = np.hstack( [intermediate_coords, np.dstack([cx, cy])]) if intermediate_coords.shape[1] == 12: enough_tiles = True break if enough_tiles: break # To top-left-corner format final_coords = intermediate_coords.squeeze() * 4**level_offset final_coords = final_coords - np.array( [self.tile_size // 2, self.tile_size // 2]) # apply offset to coordinates final_coords = final_coords + np.array( [int(self.tile_size * offset[0]), int(self.tile_size * offset[1])]) self.tile_coords = final_coords.copy() def getTiles(self, stack: bool = False, sampling_method: str = 'skeleton', mosaic_grid: tuple = (4, 3), output_tile_size: int = 128, tissue_th: tuple = (0.1, 0.7), seed: int = None, offset: tuple = (0, 0)): ''' Get tiles from the slide and stack into mosaic if needed offset = (x,y) coord offset in tile size fractions that is applied to sampled coords. This is meant as a form of data augmentation. ''' # Solve indices to be used in mosaic m, n = mosaic_grid self.getTileCoords(num_tiles=n * m, sampling_method=sampling_method, tissue_th=tissue_th, seed=seed, offset=offset) # Read regions output_img = np.ones([n * m, output_tile_size, output_tile_size, 3], dtype='uint8') * 255 for idx, coord in enumerate(self.tile_coords): x, y = coord left, right = np.int32( np.clip([x, x + self.tile_size], 0, self.dims[0])) bottom, top = np.int32( np.clip([y, y + self.tile_size], 0, self.dims[1])) tile = self.img[bottom:top, left:right].copy() tile = padIfNeeded(tile, tgt_width=self.tile_size, tgt_height=self.tile_size) tile = cv2.resize(tile, (output_tile_size, ) * 2) output_img[idx] = np.uint8(tile) if len(self.tile_coords) < m * n: warnings.warn("Could not find enough unique tiles for the slide" "(tiles: %s/%s, slide: %s" % (len(self.tile_coords), m * n, self.slide_fn)) # Stack to single array of (m,n) tiles if needed if stack: output_img = [ np.hstack([output_img[i * n + j] for j in range(n)]) for i in range(m) ] output_img = np.vstack(output_img) return np.array(output_img) def getTilesCancerStatus(self, stack: bool = False, mosaic_grid: tuple = (4, 3)): ''' Get tiles from the slide and stack into mosaic if needed ''' # Solve indices to be used in mosaic m, n = mosaic_grid # Read regions output_img = np.zeros([n * m], dtype='uint8') for idx, coord in enumerate(self.tile_coords): x, y = coord left, right = np.int32( np.clip([x, x + self.tile_size], 0, self.dims[0])) bottom, top = np.int32( np.clip([y, y + self.tile_size], 0, self.dims[1])) tile = self.mask[bottom:top, left:right].copy() tile_cat = tileClassification(tile, self.data_provider) output_img[idx] = tile_cat # Stack to single array of (m,n) tiles if needed if stack: output_img = [ np.hstack([output_img[i * n + j] for j in range(n)]) for i in range(m) ] output_img = np.vstack(output_img) return np.array(output_img) def visualizeCoverage(self, figsize=(16, 16)): ''' Visualize the coverage of indices on a slide ''' background = self.ds_img.copy() foreground = background.copy() level_offset = 2 - self.level for idx, coord in enumerate(self.tile_coords): x, y = coord left, right = np.int32( np.clip([x, x + self.tile_size], 0, self.dims[0]) / (4**level_offset)) bottom, top = np.int32( np.clip([y, y + self.tile_size], 0, self.dims[1]) / (4**level_offset)) foreground[bottom:top, left:right] = (0, 255, 0) ## Visualize output = cv2.addWeighted(background, 0.7, foreground, 0.3, 0) plt.figure(figsize=figsize) plt.imshow(output) plt.show()
def __cropPatchesFromImage(self, image_name, downsample_level=None): patch_shape = (self.__patch_size, self.__patch_size) # downsample_level: 0, 1, 2, None (random) # Use only 2 or None (MultiImage is used for low resolution image, # OpenSlide for high resolution image (to save memory and faster # process, Openslide did not work for low resolution image)) # Resolution downsample levels: 1, 4, 16 multi_image = MultiImage(image_name) use_mixed_resolutions = False if downsample_level is None: use_mixed_resolutions = True image_slide = OpenSlide(image_name) image_to_crop = multi_image[-1] else: image_to_crop = multi_image[downsample_level] image_shape = tuple(image_to_crop.shape[::-1][1:]) resolution_relation = 4**(2 - downsample_level) # Find coordinates from where to select patch cell_coordinates = self.__getCellCoordinatesFromImage(multi_image) # Crop patches patches = [] for i in range(self.__patches_per_image): # Choose mixed down sample level (low and high (not mid)) if use_mixed_resolutions: downsample_level = int(i * 2 / self.__patches_per_image) * 2 image_shape = image_slide.level_dimensions[downsample_level] resolution_relation = 4**(2 - downsample_level) # Iterate good patch for j in range(5): random_index = random.randint(0, cell_coordinates.shape[1] - 1) # Scale coordinates by the number of resolution relation # between low-resolution image and high/mid-resolution. # Take center of the cell coordinate by subtracting # 0.5*patch_size. start_y, start_x = ( cell_coordinates[:, random_index] * resolution_relation - int(0.5 * self.__patch_size)) start_x = max(0, min(start_x, image_shape[0] - self.__patch_size)) start_y = max(0, min(start_y, image_shape[1] - self.__patch_size)) end_x, end_y = np.array([start_x, start_y]) + self.__patch_size # Crop from mid/high resolution image if downsample_level == 0: patch = np.array( image_slide.read_region((start_x, start_y), 0, patch_shape))[..., :3] else: patch = image_to_crop[start_y:end_y, start_x:end_x] # Resize if original image size was smaller than patch_size if patch.shape[:2] != patch_shape: padding = np.subtract(patch_shape, patch.shape[:2]) padding = ([0, padding[0]], [0, padding[1]], [0, 0]) patch = np.pad(patch, padding, constant_values=255) # Patch has enough colored areas (not pure white) # Otherwise iterate again if np.mean(patch) < 230: break patches.append(patch) return patches
def __getitem__(self, idx): path = os.path.join(self.root_path, 'train_images') # Skimage seems to be slightly faster #image = openslide.OpenSlide(os.path.join(path, self.df['image_id'].iloc[idx] + '.tiff')) image = MultiImage(os.path.join( path, self.df['image_id'].iloc[idx] + '.tiff'), conserve_memory=False)[self.level] #image = np.array(image.read_region((0, 0), self.level, image.level_dimensions[self.level])) # Only look at regions of the image that aren't empty space and put a bounding box on it # Find those regions using a subsampled image, since NumPy is slow stride = self.patch_size // 8 f_blank = lambda x, axis: np.mean( (x - 255)**2, axis=axis) * np.var(x, axis=axis) proportion_blank = block_reduce(image[::stride, ::stride], block_size=(self.patch_size // stride, self.patch_size // stride, 3), func=f_blank) regions = np.argsort(proportion_blank, axis=None)[::-1] x = regions % proportion_blank.shape[1] * self.patch_size y = regions // proportion_blank.shape[1] * self.patch_size patches = np.full( (self.num_patches, self.patch_size, self.patch_size, 3), 255, dtype=np.uint8) for i in range(min(self.num_patches, x.shape[0])): img = image[y[i]:y[i] + self.patch_size, x[i]:x[i] + self.patch_size] patches[i, :img.shape[0], :img.shape[1]] = img image = patches label = torch.zeros(5) label[:self.df['isup_grade'].iloc[idx]] = 1 if self.use_mask: #mask = openslide.OpenSlide(os.path.join(self.root_path, 'train_label_masks', self.df['image_id'].iloc[idx] + '_mask.tiff')) mask = MultiImage(os.path.join( self.root_path, 'train_label_masks', self.df['image_id'].iloc[idx] + '_mask.tiff'), conserve_memory=False)[self.level] mask = mask[..., 0] mask_patches = np.zeros( (self.num_patches, self.patch_size, self.patch_size), dtype=np.uint8) for i in range(min(self.num_patches, x.shape[0])): msk = mask[y[i]:y[i] + self.patch_size, x[i]:x[i] + self.patch_size] mask_patches[i, :msk.shape[0], :msk.shape[1]] = msk mask = mask_patches if self.df['data_provider'].iloc[ idx] == 'karolinska': # Different data providers have different mask formats, normalise them to be the same mask[mask == 2] = 3 mask[mask == 1] = 2 if self.transforms: for i in range( self.num_patches ): # We need to iterate and apply to each image separately augmented = self.transforms(image=image[i], mask=mask[i]) image[i] = augmented['image'] mask[i] = augmented['mask'] # Convert our mask to binned binary just like the labels mask_binary = np.zeros( (mask.shape[0], 6, mask.shape[1], mask.shape[2])) for i in range(6): mask_binary[:, i] = (i == mask) mask = mask_binary #n = int(np.sqrt(self.num_patches)) #image = image.reshape(n, n, self.patch_size, self.patch_size, 3).transpose((0, 2, 1, 3, 4)).reshape(n * self.patch_size, n * self.patch_size, 3) #mask = mask.reshape(n, n, self.patch_size, self.patch_size, 6).transpose((0, 2, 1, 3, 4)).reshape(n * self.patch_size, n * self.patch_size, 6) return torch.tensor(image).permute(0, 3, 1, 2), (torch.tensor(mask), label) if self.transforms: for i in range( self.num_patches ): # We need to iterate and apply to each image separately image[i] = self.transforms(image=image[i])['image'] #n = int(np.sqrt(self.num_patches)) #image = image.reshape(n, n, self.patch_size, self.patch_size, 3).transpose((0, 2, 1, 3, 4)).reshape(n * self.patch_size, n * self.patch_size, 6) return torch.tensor(image).permute(0, 3, 1, 2), label
import os import json import sys sys.path.append("../") from tqdm import tqdm from skimage.io import MultiImage import cv2 as cv from utils.data_utils import get_tile import matplotlib.pyplot as plt images_dir = "../input/prostate-cancer-grade-assessment/train_images" output_dir = "../input/256_36_hsv" with open("../notebooks/256_36_hsv.json", 'r') as file: data = json.load(file) os.makedirs(output_dir, exist_ok=True) for path, boxes in tqdm(data.items()): img = MultiImage(os.path.join(images_dir, path) + ".tiff")[1] img = get_tile(img, boxes, 256, 36) img = 255 - cv.cvtColor(img, cv.COLOR_RGB2BGR) cv.imwrite(os.path.join(output_dir, path) + ".png", img)
def _worker(paths: Tuple[Path, Optional[Path]], namespace) -> NoReturn: self = namespace.self train_meta = namespace.train_meta image_path, mask_path = paths name = image_path.stem mask_path = Path( str(image_path).replace("train_images", "train_label_masks").replace( ".tiff", "_mask.tiff")) image_slide = MultiImage(str(image_path)) mask_slide = MultiImage(str(mask_path)) large_image = get_layer_safely(image_slide, layer=0) large_mask = get_layer_safely( mask_slide, layer=0, is_mask=True) if mask_path.exists() else None small_image = get_layer_safely(image_slide, layer=2) if large_image is None: return if small_image is None: scale = 1 / 16 small_image = cv2.resize(large_image, dsize=(0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LANCZOS4) try: pre_processor = ImagePreProcessor(reduce_memory=False) large_image = pre_processor.dual(large_image, small_image) if large_mask is not None: large_mask = pre_processor.single(large_mask) row = train_meta[train_meta.image_id == name].iloc[0] data_provider = row["data_provider"] gleason_score = row["gleason_score"] label = row["isup_grade"] slide = OpenSlide(str(image_path)) additional = { "data_provider": data_provider, "gleason_score": gleason_score, "image_shape": large_image.shape[:2], "source_image_shape": slide.dimensions, "x_resolution": float(slide.properties["tiff.XResolution"]), "y_resolution": float(slide.properties["tiff.YResolution"]), "resolution_unit": slide.properties["tiff.ResolutionUnit"] } if large_mask is None: visualization = None else: masked = draw_overlay_mask( large_image, large_mask, color_map=get_color_map(data_provider, normalized=False)) title_text = f"{data_provider} - id={name[:10]} isup={label} gleason={gleason_score}" visualization = plot_meta( masked, title_text, color_map=get_color_map(data_provider, normalized=True), classname_map=get_classname_map(data_provider), show_keys=list(np.unique(large_mask))) record = Record(large_image, large_mask, visualization, name, label, phase=Phase.TRAIN, additional=additional) self._writer.put(record) except Exception as e: print(f"{name} - {e}")
def gettextFrom_tiff_Image(file): qq = MultiImage(file, plugin='pil') for i, frame in enumerate(qq, start=1): pil_img = Image.fromarray(frame) img_str = pytesseract.image_to_string(pil_img) print(img_str)
return result_img, minimal_boxes # In[6]: names = [name for name in os.listdir(IMAGES)] compact_representation = {} mean_ratio = 0 for name in tqdm(names): img_path = os.path.join(IMAGES, name) img = MultiImage(img_path)[-1] compact_image, minimal_boxes = get_minimal_image(img) compact_representation[name] = {"original_size": img.shape[:2], "rectangles": minimal_boxes} mean_ratio += np.prod(compact_image.shape[:2]) / np.prod(img.shape[:2]) print(f"Mean ratio: {mean_ratio / len(names)}") # # # # In[7]: # # # with open("../dataset/compact_representation.json", "w") as file: # json.dump(compact_representation, file)