def isotopic_cluster(self, mz, charge=1, charge_carrier=PROTON, truncate_after=TRUNCATE_AFTER, ignore_below=IGNORE_BELOW): """Generate a theoretical isotopic pattern for the given m/z and charge state, thresholded by theoretical peak height and density. Parameters ---------- mz : float The reference m/z to calculate the neutral mass to interpolate from charge : int, optional The reference charge state to calculate the neutral mass. Defaults to 1 charge_carrier : float, optional The mass of the charge carrier. Defaults to the mass of a proton. truncate_after : float, optional The percentage of the signal in the theoretical isotopic pattern to include. Defaults to 0.95, including the first 95% of the signal in the generated pattern ignore_below : float, optional Omit theoretical peaks whose intensity is below this number. Defaults to 0.0 Returns ------- :class:`.TheoreticalIsotopicPattern` The generated and thresholded pattern """ composition = self.scale(mz, charge, charge_carrier) peaklist = isotopic_variants(composition, charge=charge) tid = TheoreticalIsotopicPattern(peaklist, peaklist[0].mz, 0) tid.shift(mz) if truncate_after < 1.0: tid.truncate_after(truncate_after) if ignore_below > 0: tid.ignore_below(ignore_below) return tid
def draw_tid(composition, charge, ax=None): tid = brainpy.isotopic_variants(composition, charge=charge) if ax is None: fig, ax = plt.subplots(1) ax = draw_peaklist(tid, ax=ax) lo, hi = ax.get_xlim() lo -= 0.5 hi += 0.5 ax.set_xlim(lo, hi) return ax
def make_tids(self): tids = [] ions = [] for comp, charges in zip(self.compositions, self.charges): for charge, abundance in charges: tid = brainpy.isotopic_variants(comp, charge=-charge) tid = TheoreticalIsotopicPattern(tid, tid[0].mz) tid.scale_raw(abundance * 100) tids.append(tid) ions.append((comp, -charge)) return tids, ions
def make_tids(self): tids = [] ions = [] for comp, charges in zip(self.compositions, self.charges): for charge, abundance in charges: tid = brainpy.isotopic_variants(comp, charge=-charge) tid = TheoreticalIsotopicPattern(tid, tid[0].mz) tid.scale_raw(abundance * 100) tids.append(tid) ions.append((comp, -charge)) return tids, ions
def test_neutral_mass(self): hexnac = {'H': 13, 'C': 8, 'O': 5, 'N': 1} dist = isotopic_variants(hexnac) reference = [ Peak(mz=203.079373, intensity=0.901867, charge=0), Peak(mz=204.082545, intensity=0.084396, charge=0), Peak(mz=205.084190, intensity=0.012787, charge=0), Peak(mz=206.086971, intensity=0.000950, charge=0) ] for inst, ref in zip(dist, reference): self.assertAlmostEqual(inst.mz, ref.mz, 3) self.assertAlmostEqual(inst.intensity, ref.intensity, 3)
def test_neutral_mass(self): hexnac = {'H': 13, 'C': 8, 'O': 5, 'N': 1} dist = isotopic_variants(hexnac) reference = [ Peak(mz=203.079373, intensity=0.901867, charge=0), Peak(mz=204.082545, intensity=0.084396, charge=0), Peak(mz=205.084190, intensity=0.012787, charge=0), Peak(mz=206.086971, intensity=0.000950, charge=0) ] for inst, ref in zip(dist, reference): self.assertAlmostEqual(inst.mz, ref.mz, 3) self.assertAlmostEqual(inst.intensity, ref.intensity, 3)
def isotopic_cluster(self, mz, charge=1, charge_carrier=PROTON, truncate_after=0.95, ignore_below=0.0): composition = self.scale(mz, charge, charge_carrier) tid = TheoreticalIsotopicPattern(isotopic_variants(composition, charge=charge)) # cumsum = 0 # result = [] # for peak in isotopic_variants(composition, charge=charge): # cumsum += peak.intensity # result.append(peak) # if cumsum >= truncate_after: # break # for peak in result: # peak.intensity *= 1. / cumsum tid.shift(mz, True) if truncate_after < 1.0: tid.truncate_after(truncate_after) if ignore_below > 0: tid.ignore_below(ignore_below) return tid
def generate_isotopic_pattern(self, charge, node_type=Unmodified): if self.composition is not None: tid = isotopic_variants( self.composition + node_type.composition, charge=charge, charge_carrier=self.charge_carrier) out = [] total = 0. for p in tid: out.append(p) total += p.intensity if total >= 0.95: break return out else: tid = self.averagine.isotopic_cluster( mass_charge_ratio( self.chromatogram.neutral_mass + node_type.mass, charge, charge_carrier=self.charge_carrier), charge, charge_carrier=self.charge_carrier) return tid
def generate_isotopic_pattern(self, charge, node_type=Unmodified): if self.composition is not None: tid = isotopic_variants(self.composition + node_type.composition, charge=charge, charge_carrier=self.charge_carrier) out = [] total = 0. for p in tid: out.append(p) total += p.intensity if total >= 0.95: break return out else: tid = self.averagine.isotopic_cluster( mass_charge_ratio(self.chromatogram.neutral_mass + node_type.mass, charge, charge_carrier=self.charge_carrier), charge, charge_carrier=self.charge_carrier) return tid
def isotopic_cluster(self, mz, charge=1, charge_carrier=PROTON, truncate_after=0.95, ignore_below=0.0): """Generate a theoretical isotopic pattern for the given m/z and charge state, thresholded by theoretical peak height and density. Parameters ---------- mz : float The reference m/z to calculate the neutral mass to interpolate from charge : int, optional The reference charge state to calculate the neutral mass. Defaults to 1 charge_carrier : float, optional The mass of the charge carrier. Defaults to the mass of a proton. truncate_after : float, optional The percentage of the signal in the theoretical isotopic pattern to include. Defaults to 0.95, including the first 95% of the signal in the generated pattern ignore_below : float, optional Omit theoretical peaks whose intensity is below this number. Defaults to 0.0 Returns ------- :class:`.TheoreticalIsotopicPattern` The generated and thresholded pattern """ composition = self.scale(mz, charge, charge_carrier) peaklist = isotopic_variants(composition, charge=charge) tid = TheoreticalIsotopicPattern(peaklist, peaklist[0].mz, 0) tid.shift(mz) if truncate_after < 1.0: tid.truncate_after(truncate_after) if ignore_below > 0: tid.ignore_below(ignore_below) return tid
def isotopic_cluster(self, mz, charge=1, charge_carrier=PROTON, truncate_after=0.95, ignore_below=0.0): composition = self.scale(mz, charge, charge_carrier) tid = TheoreticalIsotopicPattern( isotopic_variants(composition, charge=charge)) # cumsum = 0 # result = [] # for peak in isotopic_variants(composition, charge=charge): # cumsum += peak.intensity # result.append(peak) # if cumsum >= truncate_after: # break # for peak in result: # peak.intensity *= 1. / cumsum tid.shift(mz, True) if truncate_after < 1.0: tid.truncate_after(truncate_after) if ignore_below > 0: tid.ignore_below(ignore_below) return tid
def generate_isotopic_clusters_brainpy(gag, charge): f = gag_to_formula(gag) formula = dict(C=f.C, H=f.H, O=f.O, N=f.N, S=f.S) return isotopic_variants(formula, n_peaks=NUM_ISOTOPIC_PEAKS, charge=-charge)
def brainpy_function(task_dict): # unpack input isostamp_idx = task_dict['idx'] precursor_mz = task_dict['precursor_mz'] precursor_charge = task_dict['precursor_charge'] obs_precursor_mass = task_dict['obs_precursor_mass'] base_peak_correlation = task_dict['base_peak_correlation'] # extract info pd_mode = 1. / precursor_charge mz_lower_bound = precursor_mz - param.PRECURSOR_MZ_WINDOW mz_upper_bound = precursor_mz + param.PRECURSOR_MZ_WINDOW data_md = ts_reindex_v2(task_dict['ms1_mz_intensity_df'], precision=1) # conditions that if met will return zero propensity if data_md.empty or data_md[mz_lower_bound:mz_upper_bound].sum() == 0: #logging.warning(f'{isostamp_idx} mz intensity spectrum is empty') return [isostamp_idx, 0, 0, 0, 0] match_intensity = data_md[(precursor_mz - 2.5 * pd_mode):(precursor_mz + 2.5 * pd_mode)] if match_intensity.sum() == 0: #logging.warning(f'{isostamp_idx} match_intensity is empty') return [isostamp_idx, 0, 0, 0, 0] # Brainpy predicts the "regular pattern" to compare against actual data fold_diff = obs_precursor_mass / get_averagine_mass() composition_estimate = {k: int(v * fold_diff) for k, v in get_averagine().items()} cluster = isotopic_variants(composition_estimate, charge=precursor_charge) # line profiler found that the first time accessing "cluster" takes ~16 ms per hit # despite that it is of type 'list'. explicitly cast to list cluster = list(cluster) regular_pattern = pd.Series({peak.mz: peak.intensity for peak in cluster}).sort_index().round(param.PRECISION) # remove peaks that are too low in intensity_array regular_pattern = regular_pattern[regular_pattern.values > (regular_pattern.max() * param.INTENSITY_MIN)] regular_pattern.index = regular_pattern.index + precursor_mz - regular_pattern.index[0] if param.ISOSTAMP is not None: isostamp_pattern = isostamp_convolution(regular_pattern, precursor_charge, param.ISOSTAMP['element_type'], param.ISOSTAMP['element_count'], param.ISOSTAMP['ratio']).round(param.PRECISION) else: isostamp_pattern = regular_pattern.copy() # isostamp pattern isostamp_pattern = isostamp_pattern[isostamp_pattern.values > isostamp_pattern.max() * param.INTENSITY_MIN] isostamp_pattern.index = isostamp_pattern.index + precursor_mz - isostamp_pattern.index[2] # scaling constant of the relative maximum peak regular_pattern = regular_pattern * (match_intensity.max() / regular_pattern.max()) isostamp_pattern = isostamp_pattern * (match_intensity.max() / isostamp_pattern.max()) # get the diagnostic m/z for later scoring regular_diag_mz = np.concatenate([isostamp_pattern.index[0:2] - isostamp_pattern.index[2] + regular_pattern.index[0], regular_pattern.index]) isostamp_diag_mz = isostamp_pattern.index # test off-by-one hypothesis shift m/z array to +/- some pd_mode(s). report the best one shift_array = np.arange(-3, 4) regular_propensity_array = pd.Series(0, index=shift_array) isostamp_propensity_array = regular_propensity_array.copy() for shift in regular_propensity_array.index: regular_mz_array = np.array(regular_pattern.index + shift * pd_mode * MASS_H) regular_mask = ((regular_mz_array >= mz_lower_bound) & (regular_mz_array <= mz_upper_bound) & (regular_pattern.values > 0)) regular_mz_int = pd.Series(regular_pattern.values, index=regular_mz_array)[regular_mask] regular_diag_mz_shifted = regular_diag_mz + shift * pd_mode * MASS_H regular_propensity_array.loc[shift] = pearson_correlation(ts_reindex_v2(regular_mz_int, 1), data_md, regular_diag_mz_shifted) isostamp_mz_array = np.array(isostamp_pattern.index + shift * pd_mode * MASS_H) isostamp_mask = ((isostamp_mz_array >= mz_lower_bound) & (isostamp_mz_array <= mz_upper_bound) & (isostamp_pattern.values > 0)) isostamp_mz_int = pd.Series(isostamp_pattern.values, index=isostamp_mz_array)[isostamp_mask] isostamp_diag_mz_shifted = isostamp_diag_mz + shift * pd_mode * MASS_H isostamp_propensity_array.loc[shift] = pearson_correlation(ts_reindex_v2(isostamp_mz_int, 1), data_md, isostamp_diag_mz_shifted) if regular_propensity_array.empty: regular_propensity = 0 regular_best_shift = 0 elif regular_propensity_array.max() > param.PRECURSOR_PROPENSITY_FLOOR: regular_propensity = regular_propensity_array.max() regular_best_shift = regular_propensity_array.idxmax() else: regular_propensity = regular_propensity_array.max() regular_best_shift = 0 regular_precursor_mass = (regular_pattern.index[0] - MASS_H + regular_best_shift * pd_mode * MASS_H) * precursor_charge if isostamp_propensity_array.empty: isostamp_propensity = 0 isostamp_best_shift = 0 elif isostamp_propensity_array.max() > param.PRECURSOR_PROPENSITY_FLOOR: isostamp_propensity = isostamp_propensity_array.max() isostamp_best_shift = isostamp_propensity_array.idxmax() else: isostamp_propensity = isostamp_propensity_array.max() isostamp_best_shift = 0 isostamp_precursor_mass = (isostamp_pattern.index[2] - MASS_H + isostamp_best_shift * pd_mode * MASS_H) * precursor_charge return [isostamp_idx, isostamp_propensity, regular_propensity, isostamp_precursor_mass, regular_precursor_mass, base_peak_correlation]
def build_matched_modification(data, ptm_map, tol, moff_pride_flag, h_rt_w): """ Computation of th. isotopic envelope tanking into account PSM modification :param data: :param ptm_map: :param tol: :param moff_pride_flag: :param h_rt_w: :return: """ all_isotope_df = pd.DataFrame( columns=['peptide', 'mz', 'ratio_iso', 'tol', 'rt', 'matched', 'ts', 'te']) for row in data.itertuples(): # get the sequence # for MQ sequence is (mod_tag ) # for PS sequence is <mod_tag> mq_mod_flag = False if mq_mod_flag: if not ('(' in row.mod_peptide) and mq_mod_flag: # only fixed mod comps = Counter( list(chain(*[list(std_aa_comp[aa].elements()) for aa in row.peptide]))) comps["H"] += 2 comps["O"] += 1 fix_mod_count = row.peptide.count('C') if fix_mod_count > 0: comps["H"] += (ptm_map['cC']['deltaChem'] [0] * fix_mod_count) comps["C"] += (ptm_map['cC']['deltaChem'] [1] * fix_mod_count) comps["N"] += (ptm_map['cC']['deltaChem'] [2] * fix_mod_count) comps["O"] += (ptm_map['cC']['deltaChem'] [3] * fix_mod_count) else: comps = Counter( list(chain(*[list(std_aa_comp[aa].elements()) for aa in row.peptide]))) for ptm in ptm_map.keys(): ptm_c = row.mod_peptide.count(ptm) if ptm_c >= 1: comps["H"] += (ptm_map[ptm]['deltaChem'][0] * ptm_c) comps["C"] += (ptm_map[ptm]['deltaChem'][1] * ptm_c) comps["N"] += (ptm_map[ptm]['deltaChem'][2] * ptm_c) comps["O"] += (ptm_map[ptm]['deltaChem'][3] * ptm_c) # add eventually fixed mod/ fix_mod_count = row.mod_peptide.count('C') if fix_mod_count > 0: comps["H"] += (ptm_map['cC']['deltaChem'] [0] * fix_mod_count) comps["C"] += (ptm_map['cC']['deltaChem'] [1] * fix_mod_count) comps["N"] += (ptm_map['cC']['deltaChem'] [2] * fix_mod_count) comps["O"] += (ptm_map['cC']['deltaChem'] [3] * fix_mod_count) comps["H"] += 2 comps["O"] += 1 else: # fixed and variable mod are both in the sequence comps = Counter( list(chain(*[list(std_aa_comp[aa].elements()) for aa in row.peptide]))) if '<' in row.mod_peptide or '-' in row.mod_peptide: # check only if modificatio are present. # for the future use dthe tag_mod_sequence_delimiter use in moFF_setting for ptm in ptm_map.keys(): ptm_c = row.mod_peptide.count(ptm) # ptm_c = sum(ptm in s for s in row.mod_peptide) if ptm_c >= 1: comps["H"] += (ptm_map[ptm]['deltaChem'][0] * ptm_c) comps["C"] += (ptm_map[ptm]['deltaChem'][1] * ptm_c) comps["N"] += (ptm_map[ptm]['deltaChem'][2] * ptm_c) comps["O"] += (ptm_map[ptm]['deltaChem'][3] * ptm_c) comps["H"] += 2 comps["O"] += 1 theoretical_isotopic_cluster = isotopic_variants( comps, charge= int(round(row.mass / float(row.mz))) , npeaks=3) mz_iso = [peak.mz for peak in theoretical_isotopic_cluster] delta = mz_iso[0] - mz_iso[1] mz_iso.append(mz_iso[0] + delta) ratio_iso = [peak.intensity for peak in theoretical_isotopic_cluster] ratio_iso.append(-1) isotopic_df = pd.DataFrame({'mz': mz_iso, 'ratio_iso': ratio_iso}) isotopic_df.loc[:, 'exp_mz'] = row.mz isotopic_df.loc[:, 'peptide'] = row.mod_peptide isotopic_df.loc[:, 'tol'] = int(tol) isotopic_df.loc[:, 'rt'] = row.rt isotopic_df.loc[:, 'matched'] = 1 if moff_pride_flag: # moffpridedata rt is in minutes isotopic_df['ts'] = (row.rt) - h_rt_w isotopic_df['te'] = (row.rt) + h_rt_w else: # not moffpridedata rt in second isotopic_df['ts'] = (row.rt / 60) - h_rt_w isotopic_df['te'] = (row.rt / 60) + h_rt_w all_isotope_df = pd.concat( [all_isotope_df, isotopic_df], join='outer', axis=0, sort=False) all_isotope_df.reset_index(inplace=True) return all_isotope_df
def build_matched_modification(data, ptm_map, tol, moff_pride_flag, h_rt_w): """ Computation of th. isotopic envelope tanking into account PSM modification :param data: :param ptm_map: :param tol: :param moff_pride_flag: :param h_rt_w: :return: """ all_isotope_df = pd.DataFrame( columns=['peptide', 'mz', 'ratio_iso', 'tol', 'rt', 'matched', 'ts', 'te']) for row in data.itertuples(): # get the sequence # for MQ sequence is (mod_tag ) # for PS sequence is <mod_tag> mq_mod_flag = False if mq_mod_flag: if not ('(' in row.mod_peptide) and mq_mod_flag: # only fixed mod comps = Counter( list(chain(*[list(std_aa_comp[aa].elements()) for aa in row.peptide]))) comps["H"] += 2 comps["O"] += 1 fix_mod_count = row.peptide.count('C') if fix_mod_count > 0: comps["H"] += (ptm_map['cC']['deltaChem'] [0] * fix_mod_count) comps["C"] += (ptm_map['cC']['deltaChem'] [1] * fix_mod_count) comps["N"] += (ptm_map['cC']['deltaChem'] [2] * fix_mod_count) comps["O"] += (ptm_map['cC']['deltaChem'] [3] * fix_mod_count) else: comps = Counter( list(chain(*[list(std_aa_comp[aa].elements()) for aa in row.peptide]))) for ptm in ptm_map.keys(): ptm_c = row.mod_peptide.count(ptm) if ptm_c >= 1: comps["H"] += (ptm_map[ptm]['deltaChem'][0] * ptm_c) comps["C"] += (ptm_map[ptm]['deltaChem'][1] * ptm_c) comps["N"] += (ptm_map[ptm]['deltaChem'][2] * ptm_c) comps["O"] += (ptm_map[ptm]['deltaChem'][3] * ptm_c) # add eventually fixed mod/ fix_mod_count = row.mod_peptide.count('C') if fix_mod_count > 0: comps["H"] += (ptm_map['cC']['deltaChem'] [0] * fix_mod_count) comps["C"] += (ptm_map['cC']['deltaChem'] [1] * fix_mod_count) comps["N"] += (ptm_map['cC']['deltaChem'] [2] * fix_mod_count) comps["O"] += (ptm_map['cC']['deltaChem'] [3] * fix_mod_count) comps["H"] += 2 comps["O"] += 1 else: # fixed and variable mod are both in the sequence comps = Counter( list(chain(*[list(std_aa_comp[aa].elements()) for aa in row.peptide]))) if '<' in row.mod_peptide or '-' in row.mod_peptide: # check only if modificatio are present. # for the future use dthe tag_mod_sequence_delimiter use in moFF_setting for ptm in ptm_map.keys(): ptm_c = row.mod_peptide.count(ptm) # ptm_c = sum(ptm in s for s in row.mod_peptide) if ptm_c >= 1: comps["H"] += (ptm_map[ptm]['deltaChem'][0] * ptm_c) comps["C"] += (ptm_map[ptm]['deltaChem'][1] * ptm_c) comps["N"] += (ptm_map[ptm]['deltaChem'][2] * ptm_c) comps["O"] += (ptm_map[ptm]['deltaChem'][3] * ptm_c) comps["H"] += 2 comps["O"] += 1 theoretical_isotopic_cluster = isotopic_variants( comps, charge= int(round(row.mass / float(row.mz))) , npeaks=3) mz_iso = [peak.mz for peak in theoretical_isotopic_cluster] delta = mz_iso[0] - mz_iso[1] mz_iso.append(mz_iso[0] + delta) ratio_iso = [peak.intensity for peak in theoretical_isotopic_cluster] ratio_iso.append(-1) isotopic_df = pd.DataFrame({'mz': mz_iso, 'ratio_iso': ratio_iso}) isotopic_df.loc[:, 'exp_mz'] = row.mz isotopic_df.loc[:, 'peptide'] = row.mod_peptide isotopic_df.loc[:, 'tol'] = int(tol) isotopic_df.loc[:, 'rt'] = row.rt isotopic_df.loc[:, 'matched'] = 1 if moff_pride_flag: # moffpridedata rt is in minutes isotopic_df['ts'] = (row.rt) - h_rt_w isotopic_df['te'] = (row.rt) + h_rt_w else: # not moffpridedata rt in second isotopic_df['ts'] = (row.rt / 60) - h_rt_w isotopic_df['te'] = (row.rt / 60) + h_rt_w all_isotope_df = pd.concat( [all_isotope_df, isotopic_df], join='outer', axis=0, sort=False) all_isotope_df.reset_index(inplace=True) return all_isotope_df