Beispiel #1
0
def load_tiles(q, task_config):
    if task_config.op_flags.run_tile_generator:
        tile_gen_mode = tile_generator.TILE_GEN_MODE_RAW
    else:
        tile_gen_mode = tile_generator.TILE_GEN_MODE_STACK

    for region_index, tile_index in zip(task_config.region_indexes,
                                        task_config.tile_indexes):
        with tile_generator.CytokitTileGenerator(task_config.exp_config,
                                                 task_config.data_dir,
                                                 region_index,
                                                 tile_index,
                                                 mode=tile_gen_mode) as op:
            tile = op.run(None)
            logger.info('Loaded tile %s for region %s [shape = %s]',
                        tile_index + 1, region_index + 1, tile.shape)
            q.put((tile, region_index, tile_index),
                  block=True,
                  timeout=TIMEOUT)
Beispiel #2
0
    def extract(self,
                name,
                channels,
                z='best',
                region_indexes=None,
                tile_indexes=None,
                raw_dir=None):
        """Create a new data extraction include either raw, processed, or cytometric imaging data

        Args:
            name: Name of extraction to be created; This will be used to construct result path like
                EXP_DIR/output/extract/`name`
            channels: List of strings indicating channel names (case-insensitive) prefixed by source for that
                channel (e.g. proc_DAPI, raw_CD4, cyto_nucleus_boundary); Available sources are:
                - "raw": Raw data images
                - "proc": Data generated as a results of preprocessing
                - "cyto": Cytometric object data (nuclei and cell boundaries)
            z: String or 1-based index selector for z indexes constructed as any of the following:
                - "best": Indicates that z slices should be inferred based on focal quality (default option)
                - "all": Indicates that a slice for all z-planes should be used
                - str or int: A single value will be interpreted as a single index
                - tuple: A 2-item or 3-item tuple forming the slice (start, stop[, step]); stop is inclusive
                - list: A list of integers will be used as is
            region_indexes: 1-based sequence of region indexes to process; can be specified as:
                - None: Region indexes will be inferred from experiment configuration
                - str or int: A single value will be interpreted as a single index
                - tuple: A 2-item or 3-item tuple forming the slice (start, stop[, step]); stop is inclusive
                - list: A list of integers will be used as is
            tile_indexes: 1-based sequence of tile indexes to process; has same semantics as `region_indexes`
            raw_dir: If using any channels sourced from raw data, this directory must be specified and should
                be equivalent to the same raw directory used during processing (i.e. nearly all operations like
                this are run relative to an `output_dir` -- the result of processing -- but in this case
                the original raw data path is needed as well)
        """
        channel_map = _map_channels(self.config, channels).groupby('source')
        channel_sources = sorted(list(channel_map.groups.keys()))

        z_slice_fn = _get_z_slice_fn(z, self.data_dir)
        region_indexes = cli.resolve_index_list_arg(region_indexes,
                                                    zero_based=True)
        tile_indexes = cli.resolve_index_list_arg(tile_indexes,
                                                  zero_based=True)

        logging.info('Creating extraction "%s"', name)

        tile_locations = _get_tile_locations(self.config, region_indexes,
                                             tile_indexes)

        extract_path = None
        for i, loc in enumerate(tile_locations):
            logging.info('Extracting tile {} of {}'.format(
                i + 1, len(tile_locations)))
            extract_tile = []

            # Create function used to crop out z-slices from extracted volumes
            z_slice = z_slice_fn(loc.region_index, loc.tile_x, loc.tile_y)

            slice_labels = []
            for src in channel_sources:

                # Initialize tile generator for this data source (which are all the same except
                # for when using raw data, which does not have pre-assembled tiles available)
                tile_gen_dir = self.data_dir
                tile_gen_mode = 'stack'
                if src == CH_SRC_RAW:
                    if not raw_dir:
                        raise ValueError(
                            'When extracting raw data channels, the `raw_dir` argument must be provided'
                        )
                    tile_gen_dir = raw_dir
                    tile_gen_mode = 'raw'
                generator = tile_generator.CytokitTileGenerator(
                    self.config,
                    tile_gen_dir,
                    loc.region_index,
                    loc.tile_index,
                    mode=tile_gen_mode,
                    path_fmt_name=PATH_FMT_MAP[src])
                tile = generator.run(None)

                # Crop raw images if necessary
                if src == CH_SRC_RAW:
                    tile = tile_crop.CytokitTileCrop(self.config).run(tile)

                # Sort channels by name to make extract channel order deterministic
                for _, r in channel_map.get_group(src).sort_values(
                        'channel_name').iterrows():

                    # Extract (z, h, w) subtile
                    sub_tile = tile[r['cycle_index'], z_slice,
                                    r['channel_index']]
                    logging.debug(
                        'Extraction for cycle %s, channel %s (%s), z slice %s, source "%s" complete (tile shape = %s)',
                        r['cycle_index'], r['channel_index'],
                        r['channel_name'], z_slice, src, sub_tile.shape)
                    assert sub_tile.ndim == 3, \
                        'Expecting sub_tile to have 3 dimensions but got shape {}'.format(sub_tile.shape)
                    slice_labels.append('{}_{}'.format(src, r['channel_name']))
                    extract_tile.append(sub_tile)

            # Stack the subtiles to give array with shape (z, channels, h, w) and then reshape to 5D
            # format like (cycles, z, channels, h, w)
            extract_tile = np.stack(extract_tile, axis=1)[np.newaxis]
            assert extract_tile.ndim == 5, \
                'Expecting extract tile to have 5 dimensions but got shape {}'.format(extract_tile.shape)

            extract_path = cytokit_io.get_extract_image_path(
                loc.region_index, loc.tile_x, loc.tile_y, name)
            extract_path = osp.join(self.data_dir, extract_path)
            logging.debug('Saving tile with shape %s (dtype = %s) to "%s"',
                          extract_tile.shape, extract_tile.dtype, extract_path)

            # Construct slice labels as repeats across z-dimension (there is only one time/cycle dimension)
            slice_label_tags = ij_utils.get_channel_label_tags(
                slice_labels, z=extract_tile.shape[1], t=1)
            cytokit_io.save_tile(extract_path,
                                 extract_tile,
                                 config=self.config,
                                 infer_labels=False,
                                 extratags=slice_label_tags)

        logging.info('Extraction complete (results saved to %s)',
                     osp.dirname(extract_path) if extract_path else None)
Beispiel #3
0
    def test_quantification(self):
        """Validate quantification of objects in the "Random Shapes" test dataset"""

        exp_dir = osp.join(cytokit.test_data_dir, 'experiment',
                           'random-shapes')
        config_path = osp.join(exp_dir, 'config', 'experiment.yaml')
        config = ck_config.load(config_path)
        config.register_environment()

        # Pull shape of grid (i.e. region)
        region_shape = config.region_height, config.region_width

        #########################
        # Load tiles and original
        #########################

        # Load each tile for the experiment
        tiles = [
            tile_generator.CytokitTileGenerator(config,
                                                osp.join(exp_dir, 'raw'),
                                                region_index=0,
                                                tile_index=i).run()
            for i in range(config.n_tiles_per_region)
        ]
        self.assertEqual(
            tiles[0].ndim, 5,
            'Expecting 5D tiles, got shape {}'.format(tiles[0].shape))

        # Load original image used to create individual tile images (i.e. at region scale) and compare
        # to a montage generated from the tiles just loaded
        img_mtv = ck_io.read_image(
            osp.join(exp_dir, 'validation', 'original_shapes_image.tif'))
        # Create montage from first channel (which contains object ids for reference)
        img_mtg = ck_core.montage(tiles, config)[0, 0, 0]
        assert_array_equal(img_mtg, img_mtv)

        # Classify objects as either free or on border
        # * First create a stacked image containing cleared tiles
        img_clr = np.stack(
            [segmentation.clear_border(t[0, 0, 0]) for t in tiles])
        # Split ids into 2 groups based on the cleared stack image
        ids = np.unique(img_mtg[img_mtg > 0])
        ids_free = np.setdiff1d(np.unique(img_clr), [0])
        ids_brdr = np.setdiff1d(ids, ids_free)
        # Check that the background id is not included in any of the above
        self.assertTrue(
            np.all(ids_free > 0) and np.all(ids_brdr > 0) and np.all(ids > 0))

        ####################
        # Run quantification
        ####################

        def create_segments(im):
            # Create segmentation images as (z, ch, h, w)
            imb = segmentation.find_boundaries(im, mode='inner')
            segments = np.stack([im, im, imb, imb])[np.newaxis]
            assert segments.ndim == 4
            return segments

        # Quantify each tile image and concatenate results
        df = pd.concat([
            cytometer.CytometerBase.quantify(
                tiles[i],
                create_segments(tiles[i][0, 0, 0]),
                channel_names=config.channel_names,
                cell_intensity=['mean', 'median', 'sum', 'var'],
                nucleus_intensity=False,
                cell_graph=True,
                border_features=True,
                morphology_features=True).assign(tile_x=c,
                                                 tile_y=r,
                                                 tile_index=i)
            for i, (r, c) in enumerate(np.ndindex(region_shape))
        ])

        #########################
        # Validate quantification
        #########################

        # Ensure that all objects in original image are also present in cytometry data
        self.assertTrue(
            len(np.intersect1d(ids, df['id'].unique())) == len(ids),
            'Object ids expected do not match those found\nIds found: {}\nIds expected: {}'
            .format(sorted(df['id'].unique()), sorted(ids)))

        # Check that objects classified as on border or not are correct
        assert_array_equal(sorted(df[df['cb:on_border'] > 0]['id'].unique()),
                           sorted(ids_brdr))
        assert_array_equal(sorted(df[df['cb:on_border'] < 1]['id'].unique()),
                           sorted(ids_free))

        # Loop through objects identified and validate each one
        for i, r in df.iterrows():
            # Fetch tile image from tile list and make sure that size of cell returned matches that in image
            area = (tiles[r['tile_index']][0, 0, 0] == r['id']).sum()
            self.assertEquals(r['cm:size'], area)

            # For each channel validate that:
            # - mean and median equal the id times channel index (1-based)
            # - sum equals area times id times channel index
            # - variance is 0
            for j, c in enumerate(config.channel_names):
                for f in ['mean', 'median']:
                    self.assertEquals(r['id'] * (j + 1),
                                      r['ci:{}:{}'.format(c, f)])
                self.assertEquals(r['id'] * (j + 1) * area,
                                  r['ci:{}:sum'.format(c)])
                self.assertEquals(0, r['ci:{}:var'.format(c)])