def _get_z_slice_fn(z, data_dir): """Get array slice map to be applied to z dimension Args: z: String or 1-based index selector for z indexes constructed as any of the following: - "best": Indicates that z slices should be inferred based on focal quality - "all": Indicates that a slice for all z-planes should be used - str or int: A single value will be interpreted as a single index - tuple: A 2-item or 3-item tuple forming the slice (start, stop[, step]); stop is inclusive - list: A list of integers will be used as is data_dir: Data directory necessary to infer 'best' z planes Returns: A function with signature (region_index, tile_x, tile_y) -> slice_for_array where slice_for_array will either be a slice instance or a list of z-indexes (Note: all indexes are 0-based) """ if not z: raise ValueError( 'Z slice cannot be defined as empty value (given = {})'.format(z)) # Look for keyword strings if isinstance(z, str) and z == 'best': map = function_data.get_best_focus_coord_map(data_dir) return lambda ri, tx, ty: [map[(ri, tx, ty)]] if isinstance(z, str) and z == 'all': return lambda ri, tx, ty: slice(None) # Parse argument as 1-based index list and then convert to 0-based zi = cli.resolve_index_list_arg(z, zero_based=True) return lambda ri, tx, ty: zi
def montage(self, name, extract_name, region_indexes=None, crop=None): """Create a montage of extracted tiles Args: name: Name of montage to be created; This will be used to construct result path like EXP_DIR/output/montage/`name` extract_name: Name of extract to use to generate montage region_indexes: 1-based sequence of region indexes to process; can be specified as: - None: Region indexes will be inferred from experiment configuration - str or int: A single value will be interpreted as a single index - tuple: A 2-item or 3-item tuple forming the slice (start, stop[, step]); stop is inclusive - list: A list of integers will be used as is tile_indexes: 1-based sequence of tile indexes to process; has same semantics as `region_indexes` crop: Either none (default) or a 4-item list in the format (y_start, y_end, x_start, x_end) as bounding indices (0-based) that will be applied as a slice on the final montage (this is helpful for generating more reasonably sized montage subsets over large datasets) """ logging.info('Creating montage "%s" from extraction "%s"', name, extract_name) region_indexes = cli.resolve_index_list_arg(region_indexes, zero_based=True) prep_fn = None if crop is not None: prep_fn = lambda tile: tile[:, :, :, crop[0]:crop[1], crop[2]:crop[ 3]] core.create_montage(self.data_dir, self.config, extract_name, name, region_indexes, prep_fn=prep_fn)
def montage(self, name, extract_name, region_indexes=None, crop=None, scale=None): """Create a montage of extracted tiles Args: name: Name of montage to be created; This will be used to construct result path like EXP_DIR/output/montage/`name` extract_name: Name of extract to use to generate montage region_indexes: 1-based sequence of region indexes to process; can be specified as: - None: Region indexes will be inferred from experiment configuration - str or int: A single value will be interpreted as a single index - tuple: A 2-item or 3-item tuple forming the slice (start, stop[, step]); stop is inclusive - list: A list of integers will be used as is tile_indexes: 1-based sequence of tile indexes to process; has same semantics as `region_indexes` crop: Either None (default) or a 4-item list in the format (y_start, y_end, x_start, x_end) as bounding indices (0-based) that will be applied as a slice on the final montage (this is helpful for generating more reasonably sized montage subsets over large datasets) scale: Either None (default) or a float in (0, 1] used to define scale factor for XY dimensions (note that if this is supplied in addition to cropping, it is applied afterwards) """ logging.info('Creating montage "%s" from extraction "%s"', name, extract_name) region_indexes = cli.resolve_index_list_arg(region_indexes, zero_based=True) def prep_fn(tile): if crop is not None: tile = tile[..., crop[0]:crop[1], crop[2]:crop[3]] if scale is not None: from skimage import transform # Transpose to (h, w, cyc, z, ch) and then back tile = np.transpose( transform.rescale(np.transpose(tile, (3, 4, 0, 1, 2)), scale=(scale, scale, 1, 1, 1), anti_aliasing=True, multichannel=False, preserve_range=True, mode='constant', order=0), (2, 3, 4, 0, 1)).astype(tile.dtype) return tile core.create_montage(self.data_dir, self.config, extract_name, name, region_indexes, prep_fn=prep_fn)
def extract(self, name, channels, z='best', region_indexes=None, tile_indexes=None, raw_dir=None): """Create a new data extraction include either raw, processed, or cytometric imaging data Args: name: Name of extraction to be created; This will be used to construct result path like EXP_DIR/output/extract/`name` channels: List of strings indicating channel names (case-insensitive) prefixed by source for that channel (e.g. proc_DAPI, raw_CD4, cyto_nucleus_boundary); Available sources are: - "raw": Raw data images - "proc": Data generated as a results of preprocessing - "cyto": Cytometric object data (nuclei and cell boundaries) z: String or 1-based index selector for z indexes constructed as any of the following: - "best": Indicates that z slices should be inferred based on focal quality (default option) - "all": Indicates that a slice for all z-planes should be used - str or int: A single value will be interpreted as a single index - tuple: A 2-item or 3-item tuple forming the slice (start, stop[, step]); stop is inclusive - list: A list of integers will be used as is region_indexes: 1-based sequence of region indexes to process; can be specified as: - None: Region indexes will be inferred from experiment configuration - str or int: A single value will be interpreted as a single index - tuple: A 2-item or 3-item tuple forming the slice (start, stop[, step]); stop is inclusive - list: A list of integers will be used as is tile_indexes: 1-based sequence of tile indexes to process; has same semantics as `region_indexes` raw_dir: If using any channels sourced from raw data, this directory must be specified and should be equivalent to the same raw directory used during processing (i.e. nearly all operations like this are run relative to an `output_dir` -- the result of processing -- but in this case the original raw data path is needed as well) """ channel_map = _map_channels(self.config, channels).groupby('source') channel_sources = sorted(list(channel_map.groups.keys())) z_slice_fn = _get_z_slice_fn(z, self.data_dir) region_indexes = cli.resolve_index_list_arg(region_indexes, zero_based=True) tile_indexes = cli.resolve_index_list_arg(tile_indexes, zero_based=True) logging.info('Creating extraction "%s"', name) tile_locations = _get_tile_locations(self.config, region_indexes, tile_indexes) extract_path = None for i, loc in enumerate(tile_locations): logging.info('Extracting tile {} of {}'.format( i + 1, len(tile_locations))) extract_tile = [] # Create function used to crop out z-slices from extracted volumes z_slice = z_slice_fn(loc.region_index, loc.tile_x, loc.tile_y) slice_labels = [] for src in channel_sources: # Initialize tile generator for this data source (which are all the same except # for when using raw data, which does not have pre-assembled tiles available) tile_gen_dir = self.data_dir tile_gen_mode = 'stack' if src == CH_SRC_RAW: if not raw_dir: raise ValueError( 'When extracting raw data channels, the `raw_dir` argument must be provided' ) tile_gen_dir = raw_dir tile_gen_mode = 'raw' generator = tile_generator.CytokitTileGenerator( self.config, tile_gen_dir, loc.region_index, loc.tile_index, mode=tile_gen_mode, path_fmt_name=PATH_FMT_MAP[src]) tile = generator.run(None) # Crop raw images if necessary if src == CH_SRC_RAW: tile = tile_crop.CytokitTileCrop(self.config).run(tile) # Sort channels by name to make extract channel order deterministic for _, r in channel_map.get_group(src).sort_values( 'channel_name').iterrows(): # Extract (z, h, w) subtile sub_tile = tile[r['cycle_index'], z_slice, r['channel_index']] logging.debug( 'Extraction for cycle %s, channel %s (%s), z slice %s, source "%s" complete (tile shape = %s)', r['cycle_index'], r['channel_index'], r['channel_name'], z_slice, src, sub_tile.shape) assert sub_tile.ndim == 3, \ 'Expecting sub_tile to have 3 dimensions but got shape {}'.format(sub_tile.shape) slice_labels.append('{}_{}'.format(src, r['channel_name'])) extract_tile.append(sub_tile) # Stack the subtiles to give array with shape (z, channels, h, w) and then reshape to 5D # format like (cycles, z, channels, h, w) extract_tile = np.stack(extract_tile, axis=1)[np.newaxis] assert extract_tile.ndim == 5, \ 'Expecting extract tile to have 5 dimensions but got shape {}'.format(extract_tile.shape) extract_path = cytokit_io.get_extract_image_path( loc.region_index, loc.tile_x, loc.tile_y, name) extract_path = osp.join(self.data_dir, extract_path) logging.debug('Saving tile with shape %s (dtype = %s) to "%s"', extract_tile.shape, extract_tile.dtype, extract_path) # Construct slice labels as repeats across z-dimension (there is only one time/cycle dimension) slice_label_tags = ij_utils.get_channel_label_tags( slice_labels, z=extract_tile.shape[1], t=1) cytokit_io.save_tile(extract_path, extract_tile, config=self.config, infer_labels=False, extratags=slice_label_tags) logging.info('Extraction complete (results saved to %s)', osp.dirname(extract_path) if extract_path else None)
def run(self, output_dir, # Data subsets to process region_indexes=None, tile_indexes=None, # Execution parameters n_workers=None, gpus=None, memory_limit=48e9, tile_prefetch_capacity=1, # Processing flags run_tile_generator=True, run_crop=True, run_resize=False, run_deconvolution=False, run_best_focus=False, run_drift_comp=False, run_summary=False, run_cytometry=False, run_illumination_correction=False, run_spectral_unmixing=False, # Bookkeeping record_execution=True): """Run processing and cytometry pipeline This application can execute the following operations on either raw or already processed data: - Drift compensation - Deconvolution - Selection of best focal planes within z-stacks - Cropping of tile overlap - Cell segmentation and quantification - Illumination correction - Spectral Unmixing Nothing beyond an input data directory and an output directory are required (see arguments below), but GPU information should be provided via the `gpus` argument to ensure that all present devices are utilized. Otherwise, all arguments have reasonable defaults that should only need to be changed in special scenarios. Args: output_dir: Directory to save results in; will be created if it does not exist region_indexes: 1-based sequence of region indexes to process; can be specified as: - None: Region indexes will be inferred from experiment configuration - str or int: A single value will be interpreted as a single index - tuple: A two-item tuple will be interpreted as a right-open range (e.g. '(1,4)' --> [1, 2, 3]) - list: A list of integers will be used as is tile_indexes: 1-based sequence of tile indexes to process; has same semantics as `region_indexes` n_workers: Number of tiles to process in parallel; should generally match number of gpus and if the `gpus` argument is given, then the length of that list will be used as a default (otherwise default is 1) gpus: 0-based list of gpu indexes to use for processing; has same semantics as other integer list arguments like `region_indexes` and `tile_indexes` (i.e. can be a scalar, list, or 2-tuple) memory_limit: Maximum amount of memory to allow per-worker; defaults to 48G tile_prefetch_capacity: Number of input tiles to buffer into memory for processing; default is 1 which is nearly always good as this means one tile will undergo processing while a second is buffered into memory asynchronously run_tile_generator: Flag indicating whether or not the source data to be processed is from un-assembled single images (typically raw microscope images) or from already assembled tiles (which would be the case if this pipeline has already been run once on raw source data) run_crop: Flag indicating whether or not overlapping pixels in raw images should be cropped off; this should generally only apply to raw images but will have no effect if images already appear to be cropped (though an annoying warning will be printed in that case so this should be set to False if not running on raw images with overlap) run_resize: Flag indicating whether or not 3D images should be resized for all further processing; if `run_crop` is True then this will operate on cropped volumes. This can be useful for downsampling images to accelerate later steps like deconvolution run_deconvolution: Flag indicating whether or not to run deconvolution run_best_focus: Flag indicating that best focal plan selection operations should be executed run_drift_comp: Flag indicating that drift compensation should be executed run_summary: Flag indicating that tile summary statistics should be computed (eg mean, max, min, etc) run_cytometry: Flag indicating whether or not image tiles should be segmented and quantified run_illumination_correction: Flag indicating whether or not image tiles and cytometry data should be adjusted according to global illumination patterns across entire regions run_spectral_unmixing: Flag indicating whether or not cross-talk between fluorescent channels should be corrected via blind spectral unmixing record_execution: Flag indicating whether or not to store arguments and environment in a file within the output directory; defaults to True record_data: Flag indicating whether or not summary information from each operation performed should be included within a file in the output directory; defaults to True """ # Save a record of execution environment and arguments if record_execution: path = cli.record_execution(output_dir) logging.info('Execution arguments and environment saved to "%s"', path) # Resolve arguments with multiple supported forms region_indexes = cli.resolve_index_list_arg(region_indexes) tile_indexes = cli.resolve_index_list_arg(tile_indexes) gpus = cli.resolve_int_list_arg(gpus) # Set other dynamic defaults if n_workers is None: # Default to 1 worker given no knowledge of available gpus n_workers = len(gpus) if gpus is not None else 1 # Configure and run pipeline op_flags = pipeline.OpFlags( run_crop=run_crop, run_resize=run_resize, run_deconvolution=run_deconvolution, run_best_focus=run_best_focus, run_drift_comp=run_drift_comp, run_summary=run_summary, run_tile_generator=run_tile_generator, run_cytometry=run_cytometry, run_illumination_correction=run_illumination_correction, run_spectral_unmixing=run_spectral_unmixing ) pl_config = pipeline.PipelineConfig( self.config, region_indexes, tile_indexes, self.data_dir, output_dir, n_workers, gpus, memory_limit, op_flags, tile_prefetch_capacity=tile_prefetch_capacity ) pipeline.run(pl_config, logging_init_fn=self._logging_init_fn)