def _calculate_mean_pixel_traces(
        label_image: np.ndarray,
        intensities: IntensityTable,
    ) -> IntensityTable:
        """
        For all pixels that contribute to a connected component, calculate the mean value for
        each (ch, round), producing an average "trace" of a feature across the imaging experiment

        Parameters
        ----------
        label_image : np.ndarray
            An image where all pixels of a connected component share the same integer ID
        intensities : IntensityTable
            decoded intensities

        Returns
        -------
        IntensityTable :
            an IntensityTable where the number of features equals the number of connected components
            and the intensities of each each feature is its mean trace.

        """
        pixel_labels = label_image.reshape(-1)
        intensities['spot_id'] = (Features.AXIS, pixel_labels)
        mean_pixel_traces = intensities.groupby('spot_id').mean(Features.AXIS)
        mean_distances = intensities[Features.DISTANCE].groupby(
            'spot_id').mean(Features.AXIS)
        mean_pixel_traces[Features.DISTANCE] = ('spot_id',
                                                np.ravel(mean_distances))

        # the 0th pixel trace corresponds to background. If present, drop it.
        try:
            mean_pixel_traces = mean_pixel_traces.drop(0, dim='spot_id')
        except KeyError:
            pass

        return mean_pixel_traces
Esempio n. 2
0
    def decode_euclidean(self, intensities: IntensityTable) -> IntensityTable:
        """Assign the closest gene by euclidean distance to each feature in an intensity table

        Parameters
        ----------
        intensities : IntensityTable
            features to be decoded

        Returns
        -------
        IntensityTable :
            intensity table containing additional data variables for gene assignments and feature
            qualities

        """
        def _min_euclidean_distance(observation: xr.DataArray,
                                    codes: Codebook) -> np.ndarray:
            """find the code with the closest euclidean distance to observation

            Parameters
            ----------
            observation : xr.DataArray
                2-dimensional DataArray of shape (n_ch, n_hyb)
            codes :
                Codebook containing codes to compare to observation

            Returns
            -------
            np.ndarray :
                1-d vector containing the distance of each code to observation

            """
            squared_diff = (codes - observation)**2
            code_distances = np.sqrt(
                squared_diff.sum((Indices.CH, Indices.HYB)))
            # order of codes changes here (automated sorting on the reshaping?)
            return code_distances

        # normalize both the intensities and the codebook
        norm_intensities = intensities.groupby(
            IntensityTable.Constants.FEATURES.value).apply(
                lambda x: x / x.sum())
        norm_codes = self.groupby(
            Codebook.Constants.GENE.value).apply(lambda x: x / x.sum())

        # calculate pairwise euclidean distance between codes and features
        func = functools.partial(_min_euclidean_distance, codes=norm_codes)
        distances = norm_intensities.groupby(
            IntensityTable.Constants.FEATURES.value).apply(func)

        # calculate quality of each decoded spot
        qualities = 1 - distances.min(Codebook.Constants.GENE.value)
        qualities_index = pd.Index(qualities)

        # identify genes associated with closest codes
        closest_code_index = distances.argmin(Codebook.Constants.GENE.value)
        gene_ids = distances.indexes[Codebook.Constants.GENE.value].values[
            closest_code_index.values]
        gene_index = pd.Index(gene_ids)

        # set new values on the intensity table in-place
        intensities[IntensityTable.Constants.GENE.value] = (
            IntensityTable.Constants.FEATURES.value, gene_index)
        intensities[IntensityTable.Constants.QUALITY.value] = (
            IntensityTable.Constants.FEATURES.value, qualities_index)

        return intensities