Beispiel #1
0
    def decode_per_hyb_max(self,
                           intensities: IntensityTable) -> IntensityTable:
        """decode each feature by selecting the per-hybridization round max-valued channel

        Notes
        -----
        If no code matches the per-channel max of a feature, it will be assigned np.nan instead
        of a gene value

        Parameters
        ----------
        intensities : IntensityTable
            features to be decoded

        Returns
        -------
        IntensityTable :
            intensity table containing additional data variables for gene assignments

        """
        def _view_row_as_element(array: np.ndarray) -> np.ndarray:
            """view an entire code as a single element

            This view allows vectors (codes) to be compared for equality without need for multiple
            comparisons by casting the data in each code to a structured dtype that registers as
            a single value

            Parameters
            ----------
            array : np.ndarray
                2-dimensional numpy array of shape (n_observations, (n_ch * n_hyb)) where
                observations may be either features or codes.

            Returns
            -------
            np.ndarray :
                1-dimensional vector of shape n_observations

            """
            nrows, ncols = array.shape
            dtype = {
                'names': ['f{}'.format(i) for i in range(ncols)],
                'formats': ncols * [array.dtype]
            }
            return array.view(dtype)

        max_channels = intensities.argmax(Indices.CH.value)
        codes = self.argmax(Indices.CH.value)

        a = _view_row_as_element(codes.values.reshape(self.shape[0], -1))
        b = _view_row_as_element(
            max_channels.values.reshape(intensities.shape[0], -1))

        genes = np.empty(intensities.shape[0], dtype=object)
        genes.fill(np.nan)

        for i in np.arange(a.shape[0]):
            genes[np.where(a[i] == b)[0]] = codes['gene_name'][i]
        gene_index = pd.Index(genes.astype('U'))

        intensities[IntensityTable.Constants.GENE.value] = (
            IntensityTable.Constants.FEATURES.value, gene_index)

        return intensities
Beispiel #2
0
    def decode_per_round_max(self,
                             intensities: IntensityTable) -> IntensityTable:
        """decode each feature by selecting the per-imaging-round max-valued channel

        Notes
        -----
        - If no code matches the per-round maximum for a feature, it will be assigned 'nan' instead
          of a target value
        - Numpy's argmax breaks ties by picking the first channel -- this can lead to
          unexpected results where some features with "tied" channels will decode, but others will
          be assigned 'nan'.

        Parameters
        ----------
        intensities : IntensityTable
            features to be decoded

        Returns
        -------
        IntensityTable :
            intensity table containing additional data variables for target assignments

        """
        def _view_row_as_element(array: np.ndarray) -> np.ndarray:
            """view an entire code as a single element

            This view allows vectors (codes) to be compared for equality without need for multiple
            comparisons by casting the data in each code to a structured dtype that registers as
            a single value

            Parameters
            ----------
            array : np.ndarray
                2-dimensional numpy array of shape (n_observations, (n_ch * n_round)) where
                observations may be either features or codes.

            Returns
            -------
            np.ndarray :
                1-dimensional vector of shape n_observations

            """
            nrows, ncols = array.shape
            dtype = {
                'names': ['f{}'.format(i) for i in range(ncols)],
                'formats': ncols * [array.dtype]
            }
            return array.view(dtype)

        self._validate_decode_intensity_input_matches_codebook_shape(
            intensities)

        max_channels = intensities.argmax(Indices.CH.value)
        codes = self.argmax(Indices.CH.value)

        # TODO ambrosejcarr, dganguli: explore this quality score further
        # calculate distance scores by evaluating the fraction of signal in each round that is
        # found in the non-maximal channels.
        max_intensities = intensities.max(Indices.CH.value)
        round_intensities = intensities.sum(Indices.CH.value)
        distance = 1 - (max_intensities / round_intensities).mean(
            Indices.ROUND.value)

        a = _view_row_as_element(codes.values.reshape(self.shape[0], -1))
        b = _view_row_as_element(
            max_channels.values.reshape(intensities.shape[0], -1))

        targets = np.full(intensities.shape[0],
                          fill_value=np.nan,
                          dtype=object)

        # decode the intensities
        for i in np.arange(codes.shape[0]):
            targets[np.where(a[i] == b)[0]] = codes[Features.TARGET][i]

        # a code passes filters if it decodes successfully
        passes_filters = ~pd.isnull(targets)

        intensities[Features.TARGET] = (Features.AXIS, targets.astype('U'))
        intensities[Features.DISTANCE] = (Features.AXIS, distance)
        intensities[Features.PASSES_THRESHOLDS] = (Features.AXIS,
                                                   passes_filters)

        return intensities