def _get_annotation_map( spectrum_mz: np.ndarray, spectrum_intensity: np.ndarray, annotation_mz: List[float], fragment_tol_mass: float, fragment_tol_mode: str, peak_assignment: str = 'most_intense')\ -> List[Tuple[int, int]]: """ JIT helper function for `MsmsSpectrum.annotate_peaks`. Parameters ---------- spectrum_mz : np.ndarray The mass-to-charge varlues of the spectrum fragment peaks. spectrum_intensity : np.ndarray The intensities of the spectrum fragment peaks. annotation_mz : List[float] A list of mass-to-charge values of the peptide fragment annotations. fragment_tol_mass : float Fragment mass tolerance to match spectrum peaks against theoretical peaks. fragment_tol_mode : {'Da', 'ppm'} Fragment mass tolerance unit. Either 'Da' or 'ppm'. peak_assignment : {'most_intense', 'nearest_mz'}, optional In case multiple peaks occur within the given mass window around a theoretical peak, only a single peak will be annotated with the fragment type: - 'most_intense': The most intense peak will be annotated (default). - 'nearest_mz': The peak whose m/z is closest to the theoretical m/z will be annotated. Returns ------- A list of (peak index, annotation index) tuples. """ annotation_i_map = [] peak_i_start = 0 for fragment_i, fragment_mz in enumerate(annotation_mz): while (peak_i_start < len(spectrum_mz) and utils.mass_diff( spectrum_mz[peak_i_start], fragment_mz, fragment_tol_mode == 'Da') < -fragment_tol_mass): peak_i_start += 1 peak_i_stop = peak_i_start annotation_candidates_i = [] while (peak_i_stop < len(spectrum_mz) and utils.mass_diff( spectrum_mz[peak_i_stop], fragment_mz, fragment_tol_mode == 'Da') <= fragment_tol_mass): annotation_candidates_i.append(peak_i_stop) peak_i_stop += 1 if len(annotation_candidates_i) > 0: peak_annotation_i = 0 if peak_assignment == 'nearest_mz': peak_annotation_i = np.argmin( np.abs(spectrum_mz[peak_i_start:peak_i_stop] - fragment_mz)) elif peak_assignment == 'most_intense': peak_annotation_i = np.argmax( spectrum_intensity[peak_i_start:peak_i_stop]) annotation_i_map.append( (peak_i_start + peak_annotation_i, fragment_i)) return annotation_i_map
def remove_precursor_peak(self, fragment_tol_mass: float, fragment_tol_mode: str): """ Remove fragment peak(s) close to the precursor mass-to-charge ratio. Parameters ---------- fragment_tol_mass : float Fragment mass tolerance around the precursor mass to remove the precursor peak. fragment_tol_mode : {'Da', 'ppm'} Fragment mass tolerance unit. Either 'Da' or 'ppm'. Returns ------- self : `MsmsSpectrum` """ mass_diff = utils.mass_diff(self.mz, self.precursor_mz, fragment_tol_mode) peak_mask = np.where(np.abs(mass_diff) > fragment_tol_mass)[0] self.mz = self.mz[peak_mask] self.intensity = self.intensity[peak_mask] self.annotation = self.annotation[peak_mask] return self
def _get_non_precursor_peak_mask(mz: np.ndarray, pep_mass: float, max_charge: int, isotope: int, fragment_tol_mass: float, fragment_tol_mode: str)\ -> np.ndarray: """ JIT helper function for `MsmsSpectrum.remove_precursor_peak`. Parameters ---------- mz : np.ndarray The mass-to-charge ratios of the spectrum fragment peaks. pep_mass : float The mono-isotopic mass of the uncharged peptide. max_charge : int The maximum precursor loss charge. isotope : int The number of isotopic peaks to be checked. fragment_tol_mass : float Fragment mass tolerance around the precursor mass to remove the precursor peak. fragment_tol_mode : {'Da', 'ppm'} Fragment mass tolerance unit. Either 'Da' or 'ppm'. Returns ------- np.ndarray Index mask specifying which peaks are retained after precursor peak filtering. """ remove_mz = [] for charge in range(max_charge, 0, -1): for iso in range(isotope + 1): remove_mz.append((pep_mass + iso) / charge + 1.0072766) fragment_tol_mode_is_da = fragment_tol_mode == 'Da' mask = np.full_like(mz, True, np.bool_) mz_i = remove_i = 0 while mz_i < len(mz) and remove_i < len(remove_mz): md = utils.mass_diff(mz[mz_i], remove_mz[remove_i], fragment_tol_mode_is_da) if md < -fragment_tol_mass: mz_i += 1 elif md > fragment_tol_mass: remove_i += 1 else: mask[mz_i] = False mz_i += 1 return mask
def _linkage(mzs: np.ndarray, precursor_tol_mode: str) -> np.ndarray: """ Perform hierarchical clustering of a one-dimensional m/z array. Because the data is one-dimensional, no paiwise distance matrix needs to be computed, but rather sorting can be used. For information on the linkage output format, see: https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html Parameters ---------- mzs : np.ndarray The precursor m/z's for which pairwise distances are computed. precursor_tol_mode : str The unit of the precursor m/z tolerance ('Da' or 'ppm'). Returns ------- np.ndarray The hierarchical clustering encoded as a linkage matrix """ linkage = np.zeros((mzs.shape[0] - 1, 4), np.double) # min m/z, max m/z, cluster index, number of cluster elements clusters = [(mzs[i], mzs[i], i, 1) for i in np.argsort(mzs)] for it in range(mzs.shape[0] - 1): min_dist, min_i = np.inf, -1 for i in range(len(clusters) - 1): dist = suu.mass_diff(clusters[i + 1][1], clusters[i][0], precursor_tol_mode == 'Da') if dist < min_dist: min_dist, min_i = dist, i n_points = clusters[min_i][3] + clusters[min_i + 1][3] linkage[it, :] = [ clusters[min_i][2], clusters[min_i + 1][2], min_dist, n_points ] clusters[min_i] = (clusters[min_i][0], clusters[min_i + 1][1], mzs.shape[0] + it, n_points) del clusters[min_i + 1] return linkage
def annotate_peaks(self, fragment_tol_mass: float, fragment_tol_mode: str, ion_types: str = 'by', max_ion_charge: int = None, peak_assignment: str = 'most_intense'): """ Parameters ---------- fragment_tol_mass : float Fragment mass tolerance to match spectrum peaks against theoretical peaks. fragment_tol_mode : {'Da', 'ppm'} Fragment mass tolerance unit. Either 'Da' or 'ppm'. ion_types : str, optional Fragment type to annotate. Can be any combination of 'a', 'b', 'c', 'x', 'y', and 'z' (the default is 'by', which means that b-ions and y-ions will be annotated). max_ion_charge : int, optional All fragments up to and including the given charge will be annotated (by default all fragments with a charge up to the precursor minus one will be annotated). peak_assignment : {'most_intense', 'nearest_mz'}, optional In case multiple peaks occur within the given mass window around a theoretical peak, only a single peak will be annotated with the fragment type: - 'most_intense': The most intense peak will be annotated (default). - 'nearest_mz': The peak whose m/z is closest to the theoretical m/z will be annotated. Returns ------- self : `MsmsSpectrum` """ if self.peptide is None: raise ValueError('No peptide sequence available for the spectrum') if max_ion_charge is None: max_ion_charge = self.precursor_charge - 1 theoretical_fragments = _get_theoretical_peptide_fragments( self.peptide, ion_types, max_ion_charge) self.annotation = np.empty(len(self.mz), object) peak_i_start = 0 for fragment_annotation, fragment_mz in theoretical_fragments: while (peak_i_start < len(self.mz) and utils.mass_diff(self.mz[peak_i_start], fragment_mz, fragment_tol_mode) < -fragment_tol_mass): peak_i_start += 1 peak_i_stop = peak_i_start annotation_candidates_i = [] while (peak_i_stop < len(self.mz) and utils.mass_diff(self.mz[peak_i_stop], fragment_mz, fragment_tol_mode) <= fragment_tol_mass): annotation_candidates_i.append(peak_i_stop) peak_i_stop += 1 if len(annotation_candidates_i) > 0: if peak_assignment == 'nearest_mz': peak_annotation_i = np.argmin(np.abs( self.mz[peak_i_start: peak_i_stop] - fragment_mz)) elif peak_assignment == 'most_intense': peak_annotation_i = np.argmax( self.intensity[peak_i_start: peak_i_stop]) self.annotation[peak_i_start + peak_annotation_i] =\ fragment_annotation return self
def _generate_pairs_negative(row_nums: np.ndarray, mzs: np.ndarray, sequences: nb.typed.List, fragments: nb.typed.List, precursor_mz_tol: float, fragment_mz_tol: float, matching_fragments_threshold: float) \ -> Iterator[int]: """ Numba utility function to efficiently generate row numbers for negative pairs. Parameters ---------- row_nums : np.ndarray A NumPy array of row numbers for each PSM. mzs : np.ndarray A NumPy array of precursor m/z values for each PSM. sequences : nb.typed.List A list of peptide sequences for each PSM. fragments: nb.typed.List Theoretical fragments of the peptides corresponding to each PSM. precursor_mz_tol : float Maximum precursor m/z tolerance in ppm for two PSMs to be considered a negative pair. fragment_mz_tol : float Maximum fragment m/z tolerance in Da for two fragments to be considered overlapping. matching_fragments_threshold : float Maximum ratio of matching fragments relative to the number of b and y ions of shortest peptide to be considered a negative pair. Returns ------- Iterator[int] A generator of row numbers for the negative pairs, with row numbers `i` and `i + 1` forming pairs. """ for row_num1 in range(len(row_nums)): row_num2 = row_num1 + 1 while (row_num2 < len(mzs) and (abs(suu.mass_diff(mzs[row_num1], mzs[row_num2], False)) <= precursor_mz_tol)): if sequences[row_num1] != sequences[row_num2]: fragments1 = fragments[row_num1] fragments2 = fragments[row_num2] num_matching_fragments = 0 for fragment1_i, fragment2 in zip( np.searchsorted(fragments1, fragments2), fragments2): fragment1_left = fragments1[max(0, fragment1_i - 1)] fragment1_right = fragments1[min(fragment1_i, len(fragments1) - 1)] if ((abs(fragment1_left - fragment2) < fragment_mz_tol) or (abs(fragment1_right - fragment2) < fragment_mz_tol)): num_matching_fragments += 1 if num_matching_fragments < matching_fragments_threshold * min( len(fragments1), len(fragments2)): yield row_nums[row_num1] yield row_nums[row_num2] row_num2 += 1