def _load_tile_data(tx, ty): # Do nothing if this tile has already been loaded if _tile_loaded() and db.get('coords', 'tile') == (tx, ty): return path = cytokit_io.get_extract_image_path(cfg.region_index, tx, ty, cfg.extract_name) path = osp.join(cfg.exp_data_dir, path) img, meta = cytokit_io.read_tile(path, return_metadata=True) # Select cycle and z plane img = img[cfg.extract_cycle, cfg.extract_z] labels = list(meta['structured_labels'][cfg.extract_cycle, cfg.extract_z]) logger.info( 'Loaded tile image for tile x = %s, tile y = %s, shape = %s, dtype = %s', tx, ty, img.shape, img.dtype) if img.dtype != np.uint8 and img.dtype != np.uint16: raise ValueError( 'Only 8 or 16 bit images are supported (image type = {})'.format( img.dtype)) # Image is now (C, H, W) db.put('images', 'tile', img) db.put('channels', 'tile', labels) db.put('coords', 'tile', (tx, ty))
def add_cell_images(g): reg, tx, ty = g.iloc[0][['region_index', 'tile_x', 'tile_y']] # Extract the relevant 2D image to be used for both cell object isolation and cell image display path = osp.join( output_dir, cytokit_io.get_extract_image_path(reg, tx, ty, extract)) img, meta = cytokit_io.read_tile(path, return_metadata=True) icyc, iz = kwargs.get('cycle', 0), kwargs.get('z', 0) img = img[icyc, iz] channels = list(meta['structured_labels'][icyc, iz]) processor = cvproc.get_image_processor(channels, ranges=ranges, colors=colors) # Get the cell image data frame containing the original cell id, cell image based on processed # raw image, and associated cell image properties cell_data = pd.DataFrame( extract_single_cell_image_data(g, img, processor.run(img), channels, image_size=image_size)) # Verify that the only shared field between the two datasets is 'id' assert g.columns.isin(cell_data.columns).sum() == 1, \ 'Cell data frame should only have one overlapping field with cytometry data frame;' \ '\nCell fields = {}\nCytometry fields = {}'.format(cell_data.columns, g.columns) # Left join cytometry data on single cell data return pd.merge(g, cell_data, how='left', on='id')
def _run(self, *args, **kwargs): ncyc, nz, nch = self.config.n_cycles, self.config.n_z_planes, self.config.n_channels_per_cycle _validate_mode(self.mode) # If in "raw" mode, load a tile by accumlating individual grayscale images if self.mode == 'raw': # Tile should have shape (cycles, z, channel, height, width) img_cyc = [] for icyc in range(ncyc): img_ch = [] for ich in range(nch): img_z = [] for iz in range(nz): img_path = cytokit_io.get_raw_img_path(self.region_index, self.tile_index, icyc, ich, iz) img_path = osp.join(self.data_dir, img_path) img = cytokit_io.read_raw_microscope_image(img_path, self.raw_file_type) if img.ndim != 2: raise ValueError( 'Expecting raw image at path "{}" to have 2 dims but found shape {}' .format(img_path, img.shape) ) img_z.append(img) img_ch.append(np.stack(img_z, 0)) img_cyc.append(np.stack(img_ch, 1)) tile = np.stack(img_cyc, 0) # Otherwise assume that the tile has already been assembled and just read it in instead else: tx, ty = self.config.get_tile_coordinates(self.tile_index) img_path = cytokit_io.get_img_path(self.path_fmt_name, self.region_index, tx, ty) tile = cytokit_io.read_tile(osp.join(self.data_dir, img_path)) return tile
def create_montage(output_dir, config, extract, name, region_indexes, prep_fn=None, compress=6): from cytokit.utils import ij_utils # Loop through regions and generate a montage for each, skipping any (with a warning) that # do not have focal plane selection information if region_indexes is None: region_indexes = config.region_indexes path = None for ireg in region_indexes: logger.info('Generating montage for region %d of %d', ireg + 1, len(region_indexes)) tiles = [] labels = None for itile in range(config.n_tiles_per_region): tx, ty = config.get_tile_coordinates(itile) path = cytokit_io.get_extract_image_path(ireg, tx, ty, extract) tile, meta = cytokit_io.read_tile(osp.join(output_dir, path), return_metadata=True) if labels is None: labels = meta['labels'] tiles.append(tile) reg_img_montage = montage(tiles, config) if prep_fn is not None: reg_img_montage = prep_fn(reg_img_montage) path = osp.join(output_dir, cytokit_io.get_montage_image_path(ireg, name)) logger.info('Saving montage to file "%s"', path) tags = [] if labels is None else ij_utils.get_slice_label_tags(labels) cytokit_io.save_tile(path, reg_img_montage, config=config, infer_labels=False, extratags=tags, compress=compress) logger.info('Montage generation complete; results saved to "%s"', None if path is None else osp.dirname(path))
def add_cell_images(g): # Get region and tile coordinates as well as z coordinate depending on whether # it is supposed to be fetched from the given data or static reg, tx, ty = g.iloc[0][['region_index', 'tile_x', 'tile_y']] iz = g.iloc[0]['z'] if z is None else z # Extract the relevant 2D image to be used for both cell object isolation and cell image display path = osp.join( output_dir, cytokit_io.get_extract_image_path(reg, tx, ty, extract)) if path not in tile_cache: tile_cache[path] = cytokit_io.read_tile(path, return_metadata=True) img, meta = tile_cache[path] img = img[cycle, iz] channels = list(meta['structured_labels'][cycle, iz]) processor = cvproc.get_image_processor(channels, ranges=ranges, colors=colors) # Get the cell image data frame containing the original cell id, cell image based on processed # raw image, and associated cell image properties cell_data = pd.DataFrame( extract_single_cell_image_data(g, img, processor.run(img), channels, image_size=image_size, **kwargs)) # Verify that the only shared field between the two datasets is 'id' assert g.columns.isin(cell_data.columns).sum() == 1, \ 'Cell data frame should only have one overlapping field with cytometry data frame;' \ '\nCell fields = {}\nCytometry fields = {}'.format(cell_data.columns, g.columns) # Left join cytometry data on single cell data return pd.merge(g, cell_data, how='left', on='id')
def test_pipeline_01(self): out_dir = tempfile.mkdtemp(prefix='cytokit_test_pipeline_01_') print('Initialized output dir {} for pipeline test 01'.format(out_dir)) raw_dir = osp.join(cytokit.test_data_dir, 'experiment', 'cellular-marker-small', 'raw') val_dir = osp.join(cytokit.test_data_dir, 'experiment', 'cellular-marker-small', 'validation') config_dir = osp.join(cytokit.test_data_dir, 'experiment', 'cellular-marker-small', 'config') config = ck_config.load(config_dir) # Run processor and extractions/aggregations processor.Processor(data_dir=raw_dir, config_path=config_dir).run_all(output_dir=out_dir) operator.Operator(data_dir=out_dir, config_path=config_dir).run_all() analysis.Analysis(data_dir=out_dir, config_path=config_dir).run_all() # ##################### # # Processor Data Checks # # ##################### # df = ck_fn.get_processor_data(out_dir)['drift_compensator'] # Expect one drift comp record since there are two cycles and one is the reference self.assertEqual(len(df), 1) # Expecting 12 row and -3 col translation introduced in synthetic data self.assertEqual(df.iloc[0]['translation'], [12, -3]) df = ck_fn.get_processor_data(out_dir)['focal_plane_selector'] # Expect one focal selection record (there is only 1 tile in experiment and these # records are per-tile) self.assertEqual(len(df), 1) # Expecting second of 3 z planes to have the best focus (data was generated this way) self.assertEqual(df.iloc[0]['best_z'], 1) # ##################### # # Cytometry Stats Check # # ##################### # df = ck_fn.get_cytometry_data(out_dir, config, mode='best_z_plane') # Verify that the overall cell count and size found are in the expected ranges self.assertTrue( 20 <= len(df) <= 25, 'Expecting between 20 and 25 cells, found {} instead'.format( len(df))) nuc_diam, cell_diam = df['nucleus_diameter'].mean( ), df['cell_diameter'].mean() self.assertTrue( 4 < nuc_diam < 6, 'Expecting mean nucleus diameter in [4, 6] um, found {} instead'. format(nuc_diam)) self.assertTrue( 8 < cell_diam < 10, 'Expecting mean cell diameter in [8, 10] um, found {} instead'. format(cell_diam)) # The drift align dapi channels should be nearly identical across cycles, but in this case there are border # cells that end up with dapi=0 for cval=0 in drift compensation translation function so make the check # on a threshold (the ratio is < .5 with no drift compensation) dapi_ratio = df['ni:DAPI2'].mean() / df['ni:DAPI1'].mean() self.assertTrue( .8 < dapi_ratio <= 1, 'Expecting cycle 2 DAPI averages to be similar to cycle 1 DAPI after drift compensation, ' 'found ratio {} (not in (.8, 1])'.format(dapi_ratio)) # Check that all records are for single z plane (with known best focus) self.assertEqual(df['z'].nunique(), 1) self.assertEqual(int(df['z'].unique()[0]), 1) # Verify that single cell image generation works df = ck_fn.get_single_cell_image_data(out_dir, df, 'best_z_segm', image_size=(64, 64)) self.assertEqual(df['image'].iloc[0].shape, (64, 64, 3)) self.assertTrue(df['image'].notnull().all()) # ################## # # Segmentation Check # # ################## # # Load extract with object masks img, meta = ck_io.read_tile(osp.join( out_dir, ck_io.get_extract_image_path(ireg=0, tx=0, ty=0, name='best_z_segm')), return_metadata=True) # Ensure that the 8 channels set for extraction showed up in the resulting hyperstack self.assertEqual(len(meta['labels']), 8) # Verify that IoU for both nuclei and cell masks vs ground-truth is > 80% img_seg_cell = img[0, 0, meta['labels'].index('cyto_cell_mask')] img_seg_nucl = img[0, 0, meta['labels'].index('cyto_nucleus_mask')] img_val_cell = sk_io.imread(osp.join(val_dir, 'cells.tif')) img_val_nucl = sk_io.imread(osp.join(val_dir, 'nuclei.tif')) def iou(im1, im2): return ((im1 > 0) & (im2 > 0)).sum() / ((im1 > 0) | (im2 > 0)).sum() self.assertGreater(iou(img_seg_cell, img_val_cell), .8) self.assertGreater(iou(img_seg_nucl, img_val_nucl), .8) # ############# # # Montage Check # # ############# # # Load montage and check that it has the same dimensions as the extract image above, # since there is only one tile in this case img_mntg = ck_io.read_tile( osp.join(out_dir, ck_io.get_montage_image_path(ireg=0, name='best_z_segm'))) self.assertEqual(img.shape, img_mntg.shape) self.assertEqual(img.dtype, img_mntg.dtype)
def _load_montage_data(): from skimage.transform import resize path = cytokit_io.get_montage_image_path(cfg.region_index, cfg.montage_name) path = osp.join(cfg.exp_data_dir, path) img, meta = cytokit_io.read_tile(path, return_metadata=True) # Select cycle and z plane img = img[cfg.montage_cycle, cfg.montage_z] labels = list(meta['structured_labels'][cfg.montage_cycle, cfg.montage_z]) ############################ # Montage Display Properties ############################ channel_filter = cfg.montage_channel_names if channel_filter is not None: # Validate that all provided channel names exist for c in channel_filter: if c not in labels: raise ValueError( 'Configured montage channel name "{}" does not exist in montage image ' '(available channels = {}); Fix or remove this channel name from the (comma-separated) environment ' 'variable "{}" and run again'.format( c, labels, ENV_APP_MONTAGE_CHANNEL_NAMES)) # Subset both the image and the labels to the channels provided (make sure order of arrays matches # order of given channels -- which then matches to other montage options like color/range) img = img[np.array([labels.index(c) for c in channel_filter])] labels = channel_filter ranges = cfg.montage_channel_ranges colors = cfg.montage_channel_colors # Map string color names to rgb multipliers if colors is not None: colors = [color.map(c) for c in colors] #################### # Montage Resampling #################### logger.info('Loaded montage image with shape = %s, dtype = %s', img.shape, img.dtype) if img.dtype != np.uint8 and img.dtype != np.uint16: raise ValueError( 'Only 8 or 16 bit images are supported (image type = {})'.format( img.dtype)) # Resize the montage image to something much smaller (resize function expects channels last # and preserves them if not specified in target shape) img = np.moveaxis(img, 0, -1) img = resize(img, cfg.montage_target_shape, order=0, mode='constant', anti_aliasing=False, preserve_range=True).astype(img.dtype) img = np.moveaxis(img, -1, 0) # Image is now (C, H, W) db.put('images', 'montage', img) db.put('channels', 'montage', labels) db.put('colors', 'montage', colors) db.put('ranges', 'montage', ranges)
def get_extract_image_meta(output_dir, extract): path = osp.join(output_dir, cytokit_io.get_extract_image_path(0, 0, 0, extract)) _, meta = cytokit_io.read_tile(path, return_metadata=True) return meta
def get_tile_montage(config, image_dir, hyperstack, icyc=0, iz=0, ich=0, ireg=0, bw=0, bv_fn=None, allow_missing=False, imread_fn=None): """Generate a montage image for a specific cycle, z-plane, channel, and region This function supports both raw, flattened 2D images as well as consolidated, 5D hyperstacks (as determined by `hyperstack` argument) Args: config: Experiment configuration image_dir: Location of tiled images; These should include all z-planes, cycles, and channels in individual tif files (e.g. the output of the pre-processing or segmentation pipelines) hyperstack: Flag indicating whether or not images are 5D hyperstacks or flattened 2D images: - Hyperstacks are typically results from any sort of processing or segmentation step - Flattened 2D images are typically raw files generated directly from a microscope icyc: 0-based cycle index iz: 0-based z-plane index ich: 0-based channel index ireg: 0-based region index bw: Border width (in pixels) to add to each tile in the montage image, which useful for determining tile location within the montage; If <= 0, this parameter will do nothing bv_fn: Border value function with signature `fn(tile_x, tile_y) --> float`; if not given all border values are assigned a value of 0 allow_missing: Flag indicating whether or not to allow missing tiles into the montage; defaults to false and is generally only useful when debugging missing data imread_fn: When not using 5D hyperstacks (i.e. reading raw image files) this can be useful for cases when, for example, raw, single-channel files are actually 3 channel files with the first two channels blank (this happens w/ Keyence somehow). This function will take an image path and must return a single 2D image with shape (rows, cols) Returns: A (usually very large) 2D array containing all tiles stitched together """ tile_indexes = list(range(config.n_tiles_per_region)) tw, th = config.tile_width, config.tile_height tiles = [] for itile in tile_indexes: tx, ty = config.get_tile_coordinates(itile) # If operating on a hyperstack, extract the appropriate slice to add to the montage if hyperstack: path = cytokit_io.get_processor_img_path(ireg, tx, ty) path = osp.join(image_dir, path) if not osp.exists(path) and allow_missing: tile = np.zeros((th, tw)) else: tile = cytokit_io.read_tile(path) tile = tile[icyc, iz, ich, :, :] # Otherwise, assume raw acquisition files are to be loaded and then cropped before being added else: path = cytokit_io.get_raw_img_path(ireg, itile, icyc, ich, iz) path = osp.join(image_dir, path) if not osp.exists(path) and allow_missing: tile = np.zeros((th, tw)) else: tile = cytokit_io.read_image(path) if imread_fn is None else imread_fn(path) if tile.ndim != 2: raise ValueError( 'Expecting 2D image at path "{}" but shape found is {}. Consider using the ' '`imread_fn` argument to specify a custom function to open files or if already using it, ' 'make sure that results are 2D' .format(path, tile.shape) ) tile = tile_crop.apply_slice(tile, tile_crop.get_slice(config)) # Highlight borders, if configured to do so if bw > 0: bv = 0 if bv_fn is None else bv_fn(tx, ty) tile[0:bw, :] = bv tile[-bw:, :] = bv tile[:, 0:bw] = bv tile[:, -bw:] = bv # Add to montage tiles.append(tile) return core.montage(tiles, config)