Example #1
0
def _get_annotation_map(
        spectrum_mz: np.ndarray, spectrum_intensity: np.ndarray,
        annotation_mz: List[float], fragment_tol_mass: float,
        fragment_tol_mode: str, peak_assignment: str = 'most_intense')\
        -> List[Tuple[int, int]]:
    """
    JIT helper function for `MsmsSpectrum.annotate_peaks`.

    Parameters
    ----------
    spectrum_mz : np.ndarray
        The mass-to-charge varlues of the spectrum fragment peaks.
    spectrum_intensity : np.ndarray
        The intensities of the spectrum fragment peaks.
    annotation_mz : List[float]
        A list of mass-to-charge values of the peptide fragment annotations.
    fragment_tol_mass : float
        Fragment mass tolerance to match spectrum peaks against theoretical
        peaks.
    fragment_tol_mode : {'Da', 'ppm'}
        Fragment mass tolerance unit. Either 'Da' or 'ppm'.
    peak_assignment : {'most_intense', 'nearest_mz'}, optional
        In case multiple peaks occur within the given mass window around a
        theoretical peak, only a single peak will be annotated with the
        fragment type:
        - 'most_intense': The most intense peak will be annotated (default).
        - 'nearest_mz':   The peak whose m/z is closest to the theoretical m/z
                          will be annotated.

    Returns
    -------
    A list of (peak index, annotation index) tuples.
    """
    annotation_i_map = []
    peak_i_start = 0
    for fragment_i, fragment_mz in enumerate(annotation_mz):
        while (peak_i_start < len(spectrum_mz) and utils.mass_diff(
                spectrum_mz[peak_i_start], fragment_mz, fragment_tol_mode
                == 'Da') < -fragment_tol_mass):
            peak_i_start += 1
        peak_i_stop = peak_i_start
        annotation_candidates_i = []
        while (peak_i_stop < len(spectrum_mz) and utils.mass_diff(
                spectrum_mz[peak_i_stop], fragment_mz, fragment_tol_mode
                == 'Da') <= fragment_tol_mass):
            annotation_candidates_i.append(peak_i_stop)
            peak_i_stop += 1
        if len(annotation_candidates_i) > 0:
            peak_annotation_i = 0
            if peak_assignment == 'nearest_mz':
                peak_annotation_i = np.argmin(
                    np.abs(spectrum_mz[peak_i_start:peak_i_stop] -
                           fragment_mz))
            elif peak_assignment == 'most_intense':
                peak_annotation_i = np.argmax(
                    spectrum_intensity[peak_i_start:peak_i_stop])
            annotation_i_map.append(
                (peak_i_start + peak_annotation_i, fragment_i))

    return annotation_i_map
Example #2
0
    def remove_precursor_peak(self, fragment_tol_mass: float,
                              fragment_tol_mode: str):
        """
        Remove fragment peak(s) close to the precursor mass-to-charge ratio.

        Parameters
        ----------
        fragment_tol_mass : float
            Fragment mass tolerance around the precursor mass to remove the
            precursor peak.
        fragment_tol_mode : {'Da', 'ppm'}
            Fragment mass tolerance unit. Either 'Da' or 'ppm'.

        Returns
        -------
        self : `MsmsSpectrum`
        """
        mass_diff = utils.mass_diff(self.mz, self.precursor_mz,
                                    fragment_tol_mode)
        peak_mask = np.where(np.abs(mass_diff) > fragment_tol_mass)[0]
        self.mz = self.mz[peak_mask]
        self.intensity = self.intensity[peak_mask]
        self.annotation = self.annotation[peak_mask]

        return self
Example #3
0
def _get_non_precursor_peak_mask(mz: np.ndarray, pep_mass: float,
                                 max_charge: int, isotope: int,
                                 fragment_tol_mass: float,
                                 fragment_tol_mode: str)\
        -> np.ndarray:
    """
    JIT helper function for `MsmsSpectrum.remove_precursor_peak`.

    Parameters
    ----------
    mz : np.ndarray
        The mass-to-charge ratios of the spectrum fragment peaks.
    pep_mass : float
        The mono-isotopic mass of the uncharged peptide.
    max_charge : int
        The maximum precursor loss charge.
    isotope : int
        The number of isotopic peaks to be checked.
    fragment_tol_mass : float
            Fragment mass tolerance around the precursor mass to remove the
            precursor peak.
    fragment_tol_mode : {'Da', 'ppm'}
            Fragment mass tolerance unit. Either 'Da' or 'ppm'.

    Returns
    -------
    np.ndarray
        Index mask specifying which peaks are retained after precursor peak
        filtering.
    """
    remove_mz = []
    for charge in range(max_charge, 0, -1):
        for iso in range(isotope + 1):
            remove_mz.append((pep_mass + iso) / charge + 1.0072766)

    fragment_tol_mode_is_da = fragment_tol_mode == 'Da'
    mask = np.full_like(mz, True, np.bool_)
    mz_i = remove_i = 0
    while mz_i < len(mz) and remove_i < len(remove_mz):
        md = utils.mass_diff(mz[mz_i], remove_mz[remove_i],
                             fragment_tol_mode_is_da)
        if md < -fragment_tol_mass:
            mz_i += 1
        elif md > fragment_tol_mass:
            remove_i += 1
        else:
            mask[mz_i] = False
            mz_i += 1

    return mask
Example #4
0
def _linkage(mzs: np.ndarray, precursor_tol_mode: str) -> np.ndarray:
    """
    Perform hierarchical clustering of a one-dimensional m/z array.

    Because the data is one-dimensional, no paiwise distance matrix needs to be
    computed, but rather sorting can be used.

    For information on the linkage output format, see:
    https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html

    Parameters
    ----------
    mzs : np.ndarray
        The precursor m/z's for which pairwise distances are computed.
    precursor_tol_mode : str
        The unit of the precursor m/z tolerance ('Da' or 'ppm').

    Returns
    -------
    np.ndarray
        The hierarchical clustering encoded as a linkage matrix
    """
    linkage = np.zeros((mzs.shape[0] - 1, 4), np.double)
    # min m/z, max m/z, cluster index, number of cluster elements
    clusters = [(mzs[i], mzs[i], i, 1) for i in np.argsort(mzs)]
    for it in range(mzs.shape[0] - 1):
        min_dist, min_i = np.inf, -1
        for i in range(len(clusters) - 1):
            dist = suu.mass_diff(clusters[i + 1][1], clusters[i][0],
                                 precursor_tol_mode == 'Da')
            if dist < min_dist:
                min_dist, min_i = dist, i
        n_points = clusters[min_i][3] + clusters[min_i + 1][3]
        linkage[it, :] = [
            clusters[min_i][2], clusters[min_i + 1][2], min_dist, n_points
        ]
        clusters[min_i] = (clusters[min_i][0], clusters[min_i + 1][1],
                           mzs.shape[0] + it, n_points)
        del clusters[min_i + 1]

    return linkage
Example #5
0
    def annotate_peaks(self, fragment_tol_mass: float, fragment_tol_mode: str,
                       ion_types: str = 'by', max_ion_charge: int = None,
                       peak_assignment: str = 'most_intense'):
        """

        Parameters
        ----------
        fragment_tol_mass : float
            Fragment mass tolerance to match spectrum peaks against theoretical
            peaks.
        fragment_tol_mode : {'Da', 'ppm'}
            Fragment mass tolerance unit. Either 'Da' or 'ppm'.
        ion_types : str, optional
            Fragment type to annotate. Can be any combination of 'a', 'b', 'c',
            'x', 'y', and 'z' (the default is 'by', which means that b-ions and
            y-ions will be annotated).
        max_ion_charge : int, optional
            All fragments up to and including the given charge will be
            annotated (by default all fragments with a charge up to the
            precursor minus one will be annotated).
        peak_assignment : {'most_intense', 'nearest_mz'}, optional
            In case multiple peaks occur within the given mass window around a
            theoretical peak, only a single peak will be annotated with the
            fragment type:
            - 'most_intense': The most intense peak will be annotated
                              (default).
            - 'nearest_mz':   The peak whose m/z is closest to the theoretical
                              m/z will be annotated.

        Returns
        -------
        self : `MsmsSpectrum`
        """
        if self.peptide is None:
            raise ValueError('No peptide sequence available for the spectrum')
        if max_ion_charge is None:
            max_ion_charge = self.precursor_charge - 1

        theoretical_fragments = _get_theoretical_peptide_fragments(
            self.peptide, ion_types, max_ion_charge)
        self.annotation = np.empty(len(self.mz), object)
        peak_i_start = 0
        for fragment_annotation, fragment_mz in theoretical_fragments:
            while (peak_i_start < len(self.mz) and
                   utils.mass_diff(self.mz[peak_i_start], fragment_mz,
                                  fragment_tol_mode) < -fragment_tol_mass):
                peak_i_start += 1
            peak_i_stop = peak_i_start
            annotation_candidates_i = []
            while (peak_i_stop < len(self.mz) and
                   utils.mass_diff(self.mz[peak_i_stop], fragment_mz,
                                  fragment_tol_mode) <= fragment_tol_mass):
                annotation_candidates_i.append(peak_i_stop)
                peak_i_stop += 1
            if len(annotation_candidates_i) > 0:
                if peak_assignment == 'nearest_mz':
                    peak_annotation_i = np.argmin(np.abs(
                        self.mz[peak_i_start: peak_i_stop] - fragment_mz))
                elif peak_assignment == 'most_intense':
                    peak_annotation_i = np.argmax(
                        self.intensity[peak_i_start: peak_i_stop])
                self.annotation[peak_i_start + peak_annotation_i] =\
                    fragment_annotation

        return self
Example #6
0
def _generate_pairs_negative(row_nums: np.ndarray, mzs: np.ndarray,
                             sequences: nb.typed.List,
                             fragments: nb.typed.List,
                             precursor_mz_tol: float, fragment_mz_tol: float,
                             matching_fragments_threshold: float) \
        -> Iterator[int]:
    """
    Numba utility function to efficiently generate row numbers for negative
    pairs.

    Parameters
    ----------
    row_nums : np.ndarray
        A NumPy array of row numbers for each PSM.
    mzs : np.ndarray
        A NumPy array of precursor m/z values for each PSM.
    sequences : nb.typed.List
        A list of peptide sequences for each PSM.
    fragments: nb.typed.List
        Theoretical fragments of the peptides corresponding to each PSM.
    precursor_mz_tol : float
        Maximum precursor m/z tolerance in ppm for two PSMs to be considered a
        negative pair.
    fragment_mz_tol : float
        Maximum fragment m/z tolerance in Da for two fragments to be considered
        overlapping.
    matching_fragments_threshold : float
        Maximum ratio of matching fragments relative to the number of b and y
        ions of shortest peptide to be considered a negative pair.

    Returns
    -------
    Iterator[int]
        A generator of row numbers for the negative pairs, with row numbers `i`
        and `i + 1` forming pairs.
    """
    for row_num1 in range(len(row_nums)):
        row_num2 = row_num1 + 1
        while (row_num2 < len(mzs) and
               (abs(suu.mass_diff(mzs[row_num1], mzs[row_num2], False))
                <= precursor_mz_tol)):
            if sequences[row_num1] != sequences[row_num2]:
                fragments1 = fragments[row_num1]
                fragments2 = fragments[row_num2]
                num_matching_fragments = 0
                for fragment1_i, fragment2 in zip(
                        np.searchsorted(fragments1, fragments2), fragments2):
                    fragment1_left = fragments1[max(0, fragment1_i - 1)]
                    fragment1_right = fragments1[min(fragment1_i,
                                                     len(fragments1) - 1)]
                    if ((abs(fragment1_left - fragment2) < fragment_mz_tol)
                            or (abs(fragment1_right - fragment2)
                                < fragment_mz_tol)):
                        num_matching_fragments += 1

                if num_matching_fragments < matching_fragments_threshold * min(
                        len(fragments1), len(fragments2)):
                    yield row_nums[row_num1]
                    yield row_nums[row_num2]

            row_num2 += 1