예제 #1
0
def _get_z_slice_fn(z, data_dir):
    """Get array slice map to be applied to z dimension

    Args:
        z: String or 1-based index selector for z indexes constructed as any of the following:
            - "best": Indicates that z slices should be inferred based on focal quality
            - "all": Indicates that a slice for all z-planes should be used
            - str or int: A single value will be interpreted as a single index
            - tuple: A 2-item or 3-item tuple forming the slice (start, stop[, step]); stop is inclusive
            - list: A list of integers will be used as is
        data_dir: Data directory necessary to infer 'best' z planes
    Returns:
        A function with signature (region_index, tile_x, tile_y) -> slice_for_array where slice_for_array
        will either be a slice instance or a list of z-indexes (Note: all indexes are 0-based)
    """
    if not z:
        raise ValueError(
            'Z slice cannot be defined as empty value (given = {})'.format(z))

    # Look for keyword strings
    if isinstance(z, str) and z == 'best':
        map = function_data.get_best_focus_coord_map(data_dir)
        return lambda ri, tx, ty: [map[(ri, tx, ty)]]
    if isinstance(z, str) and z == 'all':
        return lambda ri, tx, ty: slice(None)

    # Parse argument as 1-based index list and then convert to 0-based
    zi = cli.resolve_index_list_arg(z, zero_based=True)
    return lambda ri, tx, ty: zi
예제 #2
0
    def montage(self, name, extract_name, region_indexes=None, crop=None):
        """Create a montage of extracted tiles

        Args:
            name: Name of montage to be created; This will be used to construct result path like
                EXP_DIR/output/montage/`name`
            extract_name: Name of extract to use to generate montage
            region_indexes: 1-based sequence of region indexes to process; can be specified as:
                - None: Region indexes will be inferred from experiment configuration
                - str or int: A single value will be interpreted as a single index
                - tuple: A 2-item or 3-item tuple forming the slice (start, stop[, step]); stop is inclusive
                - list: A list of integers will be used as is
            tile_indexes: 1-based sequence of tile indexes to process; has same semantics as `region_indexes`
            crop: Either none (default) or a 4-item list in the format (y_start, y_end, x_start, x_end) as
                bounding indices (0-based) that will be applied as a slice on the final montage (this is helpful
                for generating more reasonably sized montage subsets over large datasets)
        """
        logging.info('Creating montage "%s" from extraction "%s"', name,
                     extract_name)
        region_indexes = cli.resolve_index_list_arg(region_indexes,
                                                    zero_based=True)
        prep_fn = None
        if crop is not None:
            prep_fn = lambda tile: tile[:, :, :, crop[0]:crop[1], crop[2]:crop[
                3]]
        core.create_montage(self.data_dir,
                            self.config,
                            extract_name,
                            name,
                            region_indexes,
                            prep_fn=prep_fn)
예제 #3
0
    def montage(self,
                name,
                extract_name,
                region_indexes=None,
                crop=None,
                scale=None):
        """Create a montage of extracted tiles

        Args:
            name: Name of montage to be created; This will be used to construct result path like
                EXP_DIR/output/montage/`name`
            extract_name: Name of extract to use to generate montage
            region_indexes: 1-based sequence of region indexes to process; can be specified as:
                - None: Region indexes will be inferred from experiment configuration
                - str or int: A single value will be interpreted as a single index
                - tuple: A 2-item or 3-item tuple forming the slice (start, stop[, step]); stop is inclusive
                - list: A list of integers will be used as is
            tile_indexes: 1-based sequence of tile indexes to process; has same semantics as `region_indexes`
            crop: Either None (default) or a 4-item list in the format (y_start, y_end, x_start, x_end) as
                bounding indices (0-based) that will be applied as a slice on the final montage (this is helpful
                for generating more reasonably sized montage subsets over large datasets)
            scale: Either None (default) or a float in (0, 1] used to define scale factor for XY dimensions (note
                that if this is supplied in addition to cropping, it is applied afterwards)
        """
        logging.info('Creating montage "%s" from extraction "%s"', name,
                     extract_name)
        region_indexes = cli.resolve_index_list_arg(region_indexes,
                                                    zero_based=True)

        def prep_fn(tile):
            if crop is not None:
                tile = tile[..., crop[0]:crop[1], crop[2]:crop[3]]
            if scale is not None:
                from skimage import transform
                # Transpose to (h, w, cyc, z, ch) and then back
                tile = np.transpose(
                    transform.rescale(np.transpose(tile, (3, 4, 0, 1, 2)),
                                      scale=(scale, scale, 1, 1, 1),
                                      anti_aliasing=True,
                                      multichannel=False,
                                      preserve_range=True,
                                      mode='constant',
                                      order=0),
                    (2, 3, 4, 0, 1)).astype(tile.dtype)
            return tile

        core.create_montage(self.data_dir,
                            self.config,
                            extract_name,
                            name,
                            region_indexes,
                            prep_fn=prep_fn)
예제 #4
0
    def extract(self,
                name,
                channels,
                z='best',
                region_indexes=None,
                tile_indexes=None,
                raw_dir=None):
        """Create a new data extraction include either raw, processed, or cytometric imaging data

        Args:
            name: Name of extraction to be created; This will be used to construct result path like
                EXP_DIR/output/extract/`name`
            channels: List of strings indicating channel names (case-insensitive) prefixed by source for that
                channel (e.g. proc_DAPI, raw_CD4, cyto_nucleus_boundary); Available sources are:
                - "raw": Raw data images
                - "proc": Data generated as a results of preprocessing
                - "cyto": Cytometric object data (nuclei and cell boundaries)
            z: String or 1-based index selector for z indexes constructed as any of the following:
                - "best": Indicates that z slices should be inferred based on focal quality (default option)
                - "all": Indicates that a slice for all z-planes should be used
                - str or int: A single value will be interpreted as a single index
                - tuple: A 2-item or 3-item tuple forming the slice (start, stop[, step]); stop is inclusive
                - list: A list of integers will be used as is
            region_indexes: 1-based sequence of region indexes to process; can be specified as:
                - None: Region indexes will be inferred from experiment configuration
                - str or int: A single value will be interpreted as a single index
                - tuple: A 2-item or 3-item tuple forming the slice (start, stop[, step]); stop is inclusive
                - list: A list of integers will be used as is
            tile_indexes: 1-based sequence of tile indexes to process; has same semantics as `region_indexes`
            raw_dir: If using any channels sourced from raw data, this directory must be specified and should
                be equivalent to the same raw directory used during processing (i.e. nearly all operations like
                this are run relative to an `output_dir` -- the result of processing -- but in this case
                the original raw data path is needed as well)
        """
        channel_map = _map_channels(self.config, channels).groupby('source')
        channel_sources = sorted(list(channel_map.groups.keys()))

        z_slice_fn = _get_z_slice_fn(z, self.data_dir)
        region_indexes = cli.resolve_index_list_arg(region_indexes,
                                                    zero_based=True)
        tile_indexes = cli.resolve_index_list_arg(tile_indexes,
                                                  zero_based=True)

        logging.info('Creating extraction "%s"', name)

        tile_locations = _get_tile_locations(self.config, region_indexes,
                                             tile_indexes)

        extract_path = None
        for i, loc in enumerate(tile_locations):
            logging.info('Extracting tile {} of {}'.format(
                i + 1, len(tile_locations)))
            extract_tile = []

            # Create function used to crop out z-slices from extracted volumes
            z_slice = z_slice_fn(loc.region_index, loc.tile_x, loc.tile_y)

            slice_labels = []
            for src in channel_sources:

                # Initialize tile generator for this data source (which are all the same except
                # for when using raw data, which does not have pre-assembled tiles available)
                tile_gen_dir = self.data_dir
                tile_gen_mode = 'stack'
                if src == CH_SRC_RAW:
                    if not raw_dir:
                        raise ValueError(
                            'When extracting raw data channels, the `raw_dir` argument must be provided'
                        )
                    tile_gen_dir = raw_dir
                    tile_gen_mode = 'raw'
                generator = tile_generator.CytokitTileGenerator(
                    self.config,
                    tile_gen_dir,
                    loc.region_index,
                    loc.tile_index,
                    mode=tile_gen_mode,
                    path_fmt_name=PATH_FMT_MAP[src])
                tile = generator.run(None)

                # Crop raw images if necessary
                if src == CH_SRC_RAW:
                    tile = tile_crop.CytokitTileCrop(self.config).run(tile)

                # Sort channels by name to make extract channel order deterministic
                for _, r in channel_map.get_group(src).sort_values(
                        'channel_name').iterrows():

                    # Extract (z, h, w) subtile
                    sub_tile = tile[r['cycle_index'], z_slice,
                                    r['channel_index']]
                    logging.debug(
                        'Extraction for cycle %s, channel %s (%s), z slice %s, source "%s" complete (tile shape = %s)',
                        r['cycle_index'], r['channel_index'],
                        r['channel_name'], z_slice, src, sub_tile.shape)
                    assert sub_tile.ndim == 3, \
                        'Expecting sub_tile to have 3 dimensions but got shape {}'.format(sub_tile.shape)
                    slice_labels.append('{}_{}'.format(src, r['channel_name']))
                    extract_tile.append(sub_tile)

            # Stack the subtiles to give array with shape (z, channels, h, w) and then reshape to 5D
            # format like (cycles, z, channels, h, w)
            extract_tile = np.stack(extract_tile, axis=1)[np.newaxis]
            assert extract_tile.ndim == 5, \
                'Expecting extract tile to have 5 dimensions but got shape {}'.format(extract_tile.shape)

            extract_path = cytokit_io.get_extract_image_path(
                loc.region_index, loc.tile_x, loc.tile_y, name)
            extract_path = osp.join(self.data_dir, extract_path)
            logging.debug('Saving tile with shape %s (dtype = %s) to "%s"',
                          extract_tile.shape, extract_tile.dtype, extract_path)

            # Construct slice labels as repeats across z-dimension (there is only one time/cycle dimension)
            slice_label_tags = ij_utils.get_channel_label_tags(
                slice_labels, z=extract_tile.shape[1], t=1)
            cytokit_io.save_tile(extract_path,
                                 extract_tile,
                                 config=self.config,
                                 infer_labels=False,
                                 extratags=slice_label_tags)

        logging.info('Extraction complete (results saved to %s)',
                     osp.dirname(extract_path) if extract_path else None)
예제 #5
0
    def run(self,
            output_dir,

            # Data subsets to process
            region_indexes=None,
            tile_indexes=None,

            # Execution parameters
            n_workers=None,
            gpus=None,
            memory_limit=48e9,
            tile_prefetch_capacity=1,

            # Processing flags
            run_tile_generator=True,
            run_crop=True,
            run_resize=False,
            run_deconvolution=False,
            run_best_focus=False,
            run_drift_comp=False,
            run_summary=False,
            run_cytometry=False,
            run_illumination_correction=False,
            run_spectral_unmixing=False,

            # Bookkeeping
            record_execution=True):
        """Run processing and cytometry pipeline

        This application can execute the following operations on either raw or already processed data:
            - Drift compensation
            - Deconvolution
            - Selection of best focal planes within z-stacks
            - Cropping of tile overlap
            - Cell segmentation and quantification
            - Illumination correction
            - Spectral Unmixing

        Nothing beyond an input data directory and an output directory are required (see arguments
        below), but GPU information should be provided via the `gpus` argument to ensure that
        all present devices are utilized.  Otherwise, all arguments have reasonable defaults that
        should only need to be changed in special scenarios.

        Args:
            output_dir: Directory to save results in; will be created if it does not exist
            region_indexes: 1-based sequence of region indexes to process; can be specified as:
                - None: Region indexes will be inferred from experiment configuration
                - str or int: A single value will be interpreted as a single index 
                - tuple: A two-item tuple will be interpreted as a right-open range (e.g. '(1,4)' --> [1, 2, 3]) 
                - list: A list of integers will be used as is
            tile_indexes: 1-based sequence of tile indexes to process; has same semantics as `region_indexes`
            n_workers: Number of tiles to process in parallel; should generally match number of gpus and if
                the `gpus` argument is given, then the length of that list will be used as a default (otherwise
                default is 1)
            gpus: 0-based list of gpu indexes to use for processing; has same semantics as other integer
                list arguments like `region_indexes` and `tile_indexes` (i.e. can be a scalar, list, or 2-tuple)
            memory_limit: Maximum amount of memory to allow per-worker; defaults to 48G
            tile_prefetch_capacity: Number of input tiles to buffer into memory for processing; default is 1
                which is nearly always good as this means one tile will undergo processing while a second
                is buffered into memory asynchronously
            run_tile_generator: Flag indicating whether or not the source data to be processed is from un-assembled
                single images (typically raw microscope images) or from already assembled tiles (which would be the
                case if this pipeline has already been run once on raw source data)
            run_crop: Flag indicating whether or not overlapping pixels in raw images should be cropped off; this
                should generally only apply to raw images but will have no effect if images already appear to be
                cropped (though an annoying warning will be printed in that case so this should be set to False
                if not running on raw images with overlap)
            run_resize: Flag indicating whether or not 3D images should be resized for all further processing;
                if `run_crop` is True then this will operate on cropped volumes.  This can be useful for downsampling
                images to accelerate later steps like deconvolution
            run_deconvolution: Flag indicating whether or not to run deconvolution
            run_best_focus: Flag indicating that best focal plan selection operations should be executed
            run_drift_comp: Flag indicating that drift compensation should be executed
            run_summary: Flag indicating that tile summary statistics should be computed (eg mean, max, min, etc)
            run_cytometry: Flag indicating whether or not image tiles should be segmented and quantified
            run_illumination_correction: Flag indicating whether or not image tiles and cytometry data should be
                adjusted according to global illumination patterns across entire regions
            run_spectral_unmixing: Flag indicating whether or not cross-talk between fluorescent channels should
                be corrected via blind spectral unmixing
            record_execution: Flag indicating whether or not to store arguments and environment in
                a file within the output directory; defaults to True
            record_data: Flag indicating whether or not summary information from each operation
                performed should be included within a file in the output directory; defaults to True
        """
        # Save a record of execution environment and arguments
        if record_execution:
            path = cli.record_execution(output_dir)
            logging.info('Execution arguments and environment saved to "%s"', path)

        # Resolve arguments with multiple supported forms
        region_indexes = cli.resolve_index_list_arg(region_indexes)
        tile_indexes = cli.resolve_index_list_arg(tile_indexes)
        gpus = cli.resolve_int_list_arg(gpus)

        # Set other dynamic defaults
        if n_workers is None:
            # Default to 1 worker given no knowledge of available gpus 
            n_workers = len(gpus) if gpus is not None else 1

        # Configure and run pipeline
        op_flags = pipeline.OpFlags(
            run_crop=run_crop,
            run_resize=run_resize,
            run_deconvolution=run_deconvolution,
            run_best_focus=run_best_focus,
            run_drift_comp=run_drift_comp,
            run_summary=run_summary,
            run_tile_generator=run_tile_generator,
            run_cytometry=run_cytometry,
            run_illumination_correction=run_illumination_correction,
            run_spectral_unmixing=run_spectral_unmixing
        )
        pl_config = pipeline.PipelineConfig(
            self.config, region_indexes, tile_indexes, self.data_dir, output_dir,
            n_workers, gpus, memory_limit, op_flags,
            tile_prefetch_capacity=tile_prefetch_capacity
        )
        pipeline.run(pl_config, logging_init_fn=self._logging_init_fn)