Ejemplo n.º 1
0
    def run(self, spots: SpotFindingResults, *args) -> DecodedIntensityTable:
        """
        Decode spots by looking up the associated target value for the round and ch each spot is
        in.

        Parameters
        ----------
        spots: SpotFindingResults
            A Dict of tile indices and their corresponding measured spots

        Returns
        -------
        DecodedIntensityTable :
            IntensityTable decoded and appended with Features.TARGET and values.

        """
        lookup_table: Dict[Tuple, str] = {}
        for target in self.codebook[Features.TARGET]:
            for ch_label in self.codebook[Axes.CH.value]:
                for round_label in self.codebook[Axes.ROUND.value]:
                    if self.codebook.loc[target, round_label, ch_label]:
                        lookup_table[(int(round_label),
                                      int(ch_label))] = str(target.values)

        for r_ch_index, results in spots.items():
            target = lookup_table[
                r_ch_index] if r_ch_index in lookup_table else 'nan'
            results.spot_attrs.data[Features.TARGET] = target
        intensities = build_traces_sequential(spots)
        return DecodedIntensityTable(intensities)
Ejemplo n.º 2
0
    def _assign(
        masks: BinaryMaskCollection,
        decoded_intensities: DecodedIntensityTable,
        in_place: bool,
    ) -> DecodedIntensityTable:

        cell_ids = (Features.AXIS,
                    np.full(decoded_intensities.sizes[Features.AXIS],
                            fill_value='nan',
                            dtype='<U8'))

        decoded_intensities[Features.CELL_ID] = cell_ids

        # it's 3D data.
        for _, mask in masks:
            has_z_data = Axes.ZPLANE.value in mask.coords
            if has_z_data:
                z_min, z_max = float(mask.z.min()), float(mask.z.max())
            else:
                warnings.warn(
                    "AssignTargets will require 3D masks in the future.",
                    DeprecationWarning)
                z_min, z_max = np.NINF, np.inf
            y_min, y_max = float(mask.y.min()), float(mask.y.max())
            x_min, x_max = float(mask.x.min()), float(mask.x.max())

            in_bbox = decoded_intensities.where(
                (decoded_intensities.z >= z_min)
                & (decoded_intensities.z <= z_max)
                & (decoded_intensities.y >= y_min)
                & (decoded_intensities.y <= y_max)
                & (decoded_intensities.x >= x_min)
                & (decoded_intensities.x <= x_max),
                drop=True)

            selectors = {'y': in_bbox.y, 'x': in_bbox.x}
            if has_z_data:
                selectors['z'] = in_bbox.z
            in_mask = mask.sel(**selectors)
            spot_ids = in_bbox[Features.SPOT_ID][in_mask.values]
            decoded_intensities[Features.CELL_ID].loc[decoded_intensities[
                Features.SPOT_ID].isin(spot_ids)] = mask.name

        return decoded_intensities
Ejemplo n.º 3
0
    def decode_per_round_max(
            self, intensities: IntensityTable) -> DecodedIntensityTable:
        """
        Assigns intensity patterns that have been extracted from an :py:class:`ImageStack` and
        stored in an :py:class:`IntensityTable` by a :py:class:`SpotFinder` to the gene targets that
        they encode.

        This method carries out the assignment by identifying the maximum-intensity channel for each
        round, and assigning each spot to a code if the maximum-intensity pattern exists in the
        codebook.

        This method is only compatible with one-hot codebooks, where exactly one channel is expected
        to contain fluorescence in each imaging round. This is a common coding strategy for
        experiments that read out one DNA base with a distinct fluorophore in each imaging round.

        Notes
        -----
        - If no code matches the per-round maximum for a feature, it will be assigned 'nan' instead
          of a target value
        - Numpy's argmax breaks ties by picking the first of the tied values -- this can lead to
          unexpected results in low-precision images where some features with "tied" channels will
          decode, but others will be assigned 'nan'.

        Parameters
        ----------
        intensities : IntensityTable
            features to be decoded

        Returns
        -------
        IntensityTable :
            intensity table containing additional data variables for target assignments

        """
        def _view_row_as_element(array: np.ndarray) -> np.ndarray:
            """view an entire code as a single element

            This view allows vectors (codes) to be compared for equality without need for multiple
            comparisons by casting the data in each code to a structured dtype that registers as
            a single value

            Parameters
            ----------
            array : np.ndarray
                2-dimensional numpy array of shape (n_observations, (n_channel * n_round)) where
                observations may be either features or codes.

            Returns
            -------
            np.ndarray :
                1-dimensional vector of shape n_observations

            """
            nrows, ncols = array.shape
            dtype = {
                'names': ['f{}'.format(i) for i in range(ncols)],
                'formats': ncols * [array.dtype]
            }
            return array.view(dtype)

        self._validate_decode_intensity_input_matches_codebook_shape(
            intensities)

        # add empty metadata fields and return
        if intensities.sizes[Features.AXIS] == 0:
            return DecodedIntensityTable.from_intensity_table(
                intensities,
                targets=(Features.AXIS, np.empty(0, dtype='U')),
                distances=(Features.AXIS, np.empty(0, dtype=np.float64)),
                passes_threshold=(Features.AXIS, np.empty(0, dtype=bool)))

        intensities_without_nans = intensities.fillna(0)
        max_channels = intensities_without_nans.argmax(Axes.CH.value)
        # this snippet of code finds all the (feature, round) spots that have uniform illumination,
        # and assigns them to a ch number that's one larger than max possible to ensure that such
        # spots decode to `NaN`.
        max_channels_max = intensities_without_nans.reduce(
            np.amax, Axes.CH.value)
        max_channels_min = intensities_without_nans.reduce(
            np.amin, Axes.CH.value)
        uniform_illumination_mask = (
            max_channels_max == max_channels_min).values

        max_channels.values[uniform_illumination_mask] = intensities.sizes[
            Axes.CH.value]
        codes = self.argmax(Axes.CH.value)

        # TODO ambrosejcarr, dganguli: explore this quality score further
        # calculate distance scores by evaluating the fraction of signal in each round that is
        # found in the non-maximal channels.
        max_intensities = intensities.max(Axes.CH.value)
        round_intensities = intensities.sum(Axes.CH.value)
        distance = 1 - (max_intensities / round_intensities).mean(
            Axes.ROUND.value)

        a = _view_row_as_element(codes.values.reshape(self.shape[0], -1))
        b = _view_row_as_element(
            max_channels.values.reshape(intensities.shape[0], -1))

        targets = np.full(intensities.shape[0],
                          fill_value=np.nan,
                          dtype=object)

        # decode the intensities
        for i in np.arange(codes.shape[0]):
            targets[np.where(a[i] == b)[0]] = codes[Features.TARGET][i]

        # a code passes filters if it decodes successfully
        passes_filters = ~pd.isnull(targets)

        return DecodedIntensityTable.from_intensity_table(
            intensities,
            targets=(Features.AXIS, targets.astype('U')),
            distances=(Features.AXIS, distance),
            passes_threshold=(Features.AXIS, passes_filters))
Ejemplo n.º 4
0
    def decode_metric(
            self,
            intensities: IntensityTable,
            max_distance: Number,
            min_intensity: Number,
            norm_order: int,
            metric: str = 'euclidean',
            return_original_intensities: bool = False
    ) -> DecodedIntensityTable:
        """
        Assigns intensity patterns that have been extracted from an :py:class:`ImageStack` and
        stored in an :py:class:`IntensityTable` by a :py:class:`SpotFinder` to the gene targets that
        they encode.

        This method carries out the assignment by first normalizing both the codes and the
        recovered intensities to be unit magnitude using an L2 norm, and then finds the closest
        code for each feature according to a distance metric (default=euclidean).

        Features greater than :code:`max_distance` from their nearest code, or that have an average
        intensity below :code:`min_intensity` are not assigned to any feature.

        Parameters
        ----------
        intensities : IntensityTable
            features to be decoded
        max_distance : Number
            maximum distance between a feature and its closest code for which the coded target will
            be assigned.
        min_intensity : Number
            minimum intensity for a feature to receive a target annotation
        norm_order : int
            the scipy.linalg norm to apply to normalize codes and intensities
        metric : str
            the sklearn metric string to pass to NearestNeighbors
        return_original_intensities: bool
            If True returns original intensity values in the DecodedIntensityTable instead of
            normalized ones (default=False)

        Notes
        -----
        The available norms for this function can be found at the following link:
        https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.linalg.norm.html
        The available metrics for this function can be found at the following link:
        https://docs.scipy.org/doc/scipy-0.14.0/reference/spatial.distance.html

        Returns
        -------
        IntensityTable :
            Intensity table containing normalized intensities, target assignments, distances to
            the nearest code, and the filtering status of each feature.

        """

        self._validate_decode_intensity_input_matches_codebook_shape(
            intensities)

        # add empty metadata fields and return
        if intensities.sizes[Features.AXIS] == 0:
            return DecodedIntensityTable.from_intensity_table(
                intensities,
                targets=(Features.AXIS, np.empty(0, dtype='U')),
                distances=(Features.AXIS, np.empty(0, dtype=np.float64)),
                passes_threshold=(Features.AXIS, np.empty(0, dtype=bool)))

        # normalize both the intensities and the codebook
        norm_intensities, norms = self._normalize_features(
            intensities, norm_order=norm_order)
        norm_codes, _ = self._normalize_features(self, norm_order=norm_order)

        metric_outputs, targets = self._approximate_nearest_code(
            norm_codes, norm_intensities, metric=metric)

        # only targets with low distances and high intensities should be retained
        passes_filters = np.logical_and(norms >= min_intensity,
                                        metric_outputs <= max_distance,
                                        dtype=np.bool)

        return_intensities = intensities if return_original_intensities else norm_intensities
        # norm_intensities is a DataArray, make it back into an IntensityTable
        return DecodedIntensityTable.from_intensity_table(
            return_intensities,
            targets=(Features.AXIS, targets),
            distances=(Features.AXIS, metric_outputs),
            passes_threshold=(Features.AXIS, passes_filters))
    def run(
        self,
        intensities: IntensityTable,
        n_processes: Optional[int] = None,
    ) -> Tuple[DecodedIntensityTable, ConnectedComponentDecodingResult]:
        """
        Execute the combine_adjacent_features method on an IntensityTable containing pixel
        intensities

        Parameters
        ----------
        intensities : IntensityTable
            Pixel intensities of an imaging experiment
        n_processes : Optional[int]
            Number of parallel processes to devote to calculating the filter

        Returns
        -------
        IntensityTable :
            Table whose features comprise sets of adjacent pixels that decoded to the same target
        ConnectedComponentDecodingResult :
            NamedTuple containing :
                region_properties :
                    the properties of each connected component, in the same order as the
                    IntensityTable
                label_image : np.ndarray
                    An image where all pixels of a connected component share the same integer ID
                decoded_image : np.ndarray
                    Image whose pixels correspond to the targets that the given position in the
                    ImageStack decodes to.

        """

        # map target molecules to integers so they can be reshaped into an image that can
        # be subjected to a connected-component algorithm to find adjacent pixels with the
        # same targets
        targets = intensities[Features.TARGET].values
        target_map = TargetsMap(targets)

        # create the decoded_image
        decoded_image = self._intensities_to_decoded_image(
            intensities,
            target_map,
            self._mask_filtered,
        )

        # label the decoded image to extract connected component features
        label_image: np.ndarray = label(decoded_image,
                                        connectivity=self._connectivity)

        # calculate properties of each feature
        props: List = regionprops(np.squeeze(label_image))

        # calculate mean intensities across the pixels of each feature
        mean_pixel_traces = self._calculate_mean_pixel_traces(
            label_image,
            intensities,
        )

        # Create SpotAttributes and determine feature filtering outcomes
        spot_attributes, passes_filter = self._create_spot_attributes(
            props, decoded_image, target_map, n_processes=n_processes)

        # augment the SpotAttributes with filtering results and distances from nearest codes
        spot_attributes.data[Features.DISTANCE] = mean_pixel_traces[
            Features.DISTANCE]
        spot_attributes.data[Features.PASSES_THRESHOLDS] = passes_filter

        # create new indexes for the output IntensityTable
        channel_index = mean_pixel_traces.indexes[Axes.CH]
        round_index = mean_pixel_traces.indexes[Axes.ROUND]
        coords = IntensityTable._build_xarray_coords(spot_attributes,
                                                     channel_index,
                                                     round_index)

        # create the output IntensityTable
        dims = (Features.AXIS, Axes.CH.value, Axes.ROUND.value)
        intensity_table = DecodedIntensityTable(data=mean_pixel_traces,
                                                coords=coords,
                                                dims=dims)

        # combine the various non-IntensityTable results into a NamedTuple before returning
        ccdr = ConnectedComponentDecodingResult(props, label_image,
                                                decoded_image)

        return intensity_table, ccdr
Ejemplo n.º 6
0
    def run(self,
            spots: SpotFindingResults,
            n_processes: int=1,
            *args) -> DecodedIntensityTable:
        """
        Decode spots by finding the set of nonoverlapping barcodes that have the minimum spatial
        variance within each barcode

        Parameters
        ----------
        spots: SpotFindingResults
            A Dict of tile indices and their corresponding measured spots

        n_processes: int
            Number of threads to run decoder in parallel with

        Returns
        -------
        DecodedIntensityTable :
            IntensityTable decoded and appended with Features.TARGET and Features.QUALITY values.

        """

        # Rename n_processes (trying to stay consistent between starFISH's _ variables and my
        # camel case ones)
        numJobs = n_processes

        # If using an search radius exactly equal to a possible distance between two pixels
        # (ex: 1), some distances will be calculated as slightly less than their exact distance
        # (either due to rounding or precision errors) so search radius needs to be slightly
        # increased to ensure this doesn't happen
        self.searchRadius += 0.001

        # Create dictionary where keys are round labels and the values are pandas dataframes
        # containing information on the spots found in that round
        spotTables = _merge_spots_by_round(spots)

        # Add one to channels labels (prevents collisions between hashes of barcodes later)
        for r in spots.round_labels:
            spotTables[r]['c'] += 1

        # Set list of round omission numbers to loop through
        roundOmits = range(self.errorRounds + 1)

        # Decode for each round omission number, store results in allCodes table
        allCodes = pd.DataFrame()
        for currentRoundOmitNum in roundOmits:

            # Create necessary reference dictionaries
            neighborDict, channelDict, spotCoords = createRefDicts(spotTables, self.searchRadius)

            # Chooses best barcode for all spots in each round sequentially (possible barcode
            # space can become quite large which can increase memory needs so I do it this way so
            # we only need to store all potential barcodes that originate from one round at a
            # time)
            decodedTables = {}
            for r in range(len(spotTables)):
                roundData = deepcopy(spotTables[r])
                roundData = roundData.drop(['intensity', 'z', 'y', 'x', 'radius', 'c'], axis=1)
                roundData.index += 1

                # Create dictionary of dataframes (based on spotTables data) that contains
                # additional columns for each spot containing all the possible barcodes that
                # could be constructed from the neighbors of that spot
                roundData = buildBarcodes(roundData, neighborDict, currentRoundOmitNum,
                                          channelDict, r, numJobs)

                # Match possible barcodes to codebook and add new columns with info about barcodes
                # that had a codebook match
                roundData = decoder(roundData, self.codebook, currentRoundOmitNum, r, numJobs)

                # Choose most likely barcode for each spot in each round by find the possible
                # decodable barcode with the least spatial variance between the spots that made up
                # the barcode
                roundData = distanceFilter(roundData, spotCoords, r, numJobs)

                # Assign to DecodedTables dictionary
                decodedTables[r] = roundData

            # Only do the following if barcodes were founds
            totalSpots = sum([len(decodedTables[table]) for table in decodedTables])
            if totalSpots:

                # Turn spot table dictionary into single table, filter barcodes by round frequency,
                # add additional information, and choose between barcodes that have overlapping
                # spots
                finalCodes = cleanup(decodedTables, spotCoords, channelDict)

                # If this is not the last round omission number to run, remove spots that have just
                # been found to be in passing barcodes from spotTables so they are not used for the
                # next round omission number
                if currentRoundOmitNum != roundOmits[-1]:
                    spotTables = removeUsedSpots(finalCodes, spotTables)

                # Append found codes to allCodes table
                allCodes = allCodes.append(finalCodes).reset_index(drop=True)

        # Create and fill in intensity table
        channels = spots.ch_labels
        rounds = spots.round_labels

        # create empty IntensityTable filled with np.nan
        data = np.full((len(allCodes), len(rounds), len(channels)), fill_value=np.nan)
        dims = (Features.AXIS, Axes.ROUND.value, Axes.CH.value)
        centers = allCodes['center']
        coords: Mapping[Hashable, Tuple[str, Any]] = {
            Features.SPOT_RADIUS: (Features.AXIS, np.full(len(allCodes), 1)),
            Axes.ZPLANE.value: (Features.AXIS, np.asarray([round(c[2]) for c in centers])),
            Axes.Y.value: (Features.AXIS, np.asarray([round(c[1]) for c in centers])),
            Axes.X.value: (Features.AXIS, np.asarray([round(c[0]) for c in centers])),
            Features.SPOT_ID: (Features.AXIS, np.arange(len(allCodes))),
            Features.AXIS: (Features.AXIS, np.arange(len(allCodes))),
            Axes.ROUND.value: (Axes.ROUND.value, rounds),
            Axes.CH.value: (Axes.CH.value, channels)
        }
        int_table = IntensityTable(data=data, dims=dims, coords=coords)

        # Fill in data values
        table_codes = []
        for i in range(len(allCodes)):
            code = []
            for ch in allCodes.loc[i, 'best_barcodes']:
                # If a round is not used, row will be all zeros
                code.append(np.asarray([0 if j != ch else 1 for j in range(len(channels))]))
            table_codes.append(np.asarray(code))
        int_table.values = np.asarray(table_codes)
        int_table = transfer_physical_coords_to_intensity_table(intensity_table=int_table,
                                                                spots=spots)

        # Validate results are correct shape
        self.codebook._validate_decode_intensity_input_matches_codebook_shape(int_table)

        # Create DecodedIntensityTable
        result = DecodedIntensityTable.from_intensity_table(
            int_table,
            targets=(Features.AXIS, allCodes['best_targets'].astype('U')),
            distances=(Features.AXIS, allCodes["best_distances"]),
            passes_threshold=(Features.AXIS, np.full(len(allCodes), True)),
            rounds_used=(Features.AXIS, allCodes['rounds_used']))

        return result