def isotopic_cluster(self, mz, charge=1, charge_carrier=PROTON, truncate_after=TRUNCATE_AFTER,
                         ignore_below=IGNORE_BELOW):
        """Generate a theoretical isotopic pattern for the given m/z and charge state, thresholded
        by theoretical peak height and density.

        Parameters
        ----------
        mz : float
            The reference m/z to calculate the neutral mass to interpolate from
        charge : int, optional
            The reference charge state to calculate the neutral mass. Defaults to 1
        charge_carrier : float, optional
            The mass of the charge carrier. Defaults to the mass of a proton.
        truncate_after : float, optional
            The percentage of the signal in the theoretical isotopic pattern to include.
            Defaults to 0.95, including the first 95% of the signal in the generated pattern
        ignore_below : float, optional
            Omit theoretical peaks whose intensity is below this number.
            Defaults to 0.0

        Returns
        -------
        :class:`.TheoreticalIsotopicPattern`
            The generated and thresholded pattern
        """
        composition = self.scale(mz, charge, charge_carrier)
        peaklist = isotopic_variants(composition, charge=charge)
        tid = TheoreticalIsotopicPattern(peaklist, peaklist[0].mz, 0)
        tid.shift(mz)
        if truncate_after < 1.0:
            tid.truncate_after(truncate_after)
        if ignore_below > 0:
            tid.ignore_below(ignore_below)
        return tid
Exemple #2
0
 def draw_tid(composition, charge, ax=None):
     tid = brainpy.isotopic_variants(composition, charge=charge)
     if ax is None:
         fig, ax = plt.subplots(1)
     ax = draw_peaklist(tid, ax=ax)
     lo, hi = ax.get_xlim()
     lo -= 0.5
     hi += 0.5
     ax.set_xlim(lo, hi)
     return ax
 def make_tids(self):
     tids = []
     ions = []
     for comp, charges in zip(self.compositions, self.charges):
         for charge, abundance in charges:
             tid = brainpy.isotopic_variants(comp, charge=-charge)
             tid = TheoreticalIsotopicPattern(tid, tid[0].mz)
             tid.scale_raw(abundance * 100)
             tids.append(tid)
             ions.append((comp, -charge))
     return tids, ions
 def make_tids(self):
     tids = []
     ions = []
     for comp, charges in zip(self.compositions, self.charges):
         for charge, abundance in charges:
             tid = brainpy.isotopic_variants(comp, charge=-charge)
             tid = TheoreticalIsotopicPattern(tid, tid[0].mz)
             tid.scale_raw(abundance * 100)
             tids.append(tid)
             ions.append((comp, -charge))
     return tids, ions
    def test_neutral_mass(self):
        hexnac = {'H': 13, 'C': 8, 'O': 5, 'N': 1}
        dist = isotopic_variants(hexnac)

        reference = [
            Peak(mz=203.079373, intensity=0.901867, charge=0),
            Peak(mz=204.082545, intensity=0.084396, charge=0),
            Peak(mz=205.084190, intensity=0.012787, charge=0),
            Peak(mz=206.086971, intensity=0.000950, charge=0)
        ]
        for inst, ref in zip(dist, reference):
            self.assertAlmostEqual(inst.mz, ref.mz, 3)
            self.assertAlmostEqual(inst.intensity, ref.intensity, 3)
    def test_neutral_mass(self):
        hexnac = {'H': 13, 'C': 8, 'O': 5, 'N': 1}
        dist = isotopic_variants(hexnac)

        reference = [
            Peak(mz=203.079373, intensity=0.901867, charge=0),
            Peak(mz=204.082545, intensity=0.084396, charge=0),
            Peak(mz=205.084190, intensity=0.012787, charge=0),
            Peak(mz=206.086971, intensity=0.000950, charge=0)
        ]
        for inst, ref in zip(dist, reference):
            self.assertAlmostEqual(inst.mz, ref.mz, 3)
            self.assertAlmostEqual(inst.intensity, ref.intensity, 3)
 def isotopic_cluster(self, mz, charge=1, charge_carrier=PROTON, truncate_after=0.95, ignore_below=0.0):
     composition = self.scale(mz, charge, charge_carrier)
     tid = TheoreticalIsotopicPattern(isotopic_variants(composition, charge=charge))
     # cumsum = 0
     # result = []
     # for peak in isotopic_variants(composition, charge=charge):
     #     cumsum += peak.intensity
     #     result.append(peak)
     #     if cumsum >= truncate_after:
     #         break
     # for peak in result:
     #     peak.intensity *= 1. / cumsum
     tid.shift(mz, True)
     if truncate_after < 1.0:
         tid.truncate_after(truncate_after)
     if ignore_below > 0:
         tid.ignore_below(ignore_below)
     return tid
 def generate_isotopic_pattern(self, charge, node_type=Unmodified):
     if self.composition is not None:
         tid = isotopic_variants(
             self.composition + node_type.composition,
             charge=charge, charge_carrier=self.charge_carrier)
         out = []
         total = 0.
         for p in tid:
             out.append(p)
             total += p.intensity
             if total >= 0.95:
                 break
         return out
     else:
         tid = self.averagine.isotopic_cluster(
             mass_charge_ratio(
                 self.chromatogram.neutral_mass + node_type.mass,
                 charge, charge_carrier=self.charge_carrier),
             charge,
             charge_carrier=self.charge_carrier)
         return tid
Exemple #9
0
 def generate_isotopic_pattern(self, charge, node_type=Unmodified):
     if self.composition is not None:
         tid = isotopic_variants(self.composition + node_type.composition,
                                 charge=charge,
                                 charge_carrier=self.charge_carrier)
         out = []
         total = 0.
         for p in tid:
             out.append(p)
             total += p.intensity
             if total >= 0.95:
                 break
         return out
     else:
         tid = self.averagine.isotopic_cluster(
             mass_charge_ratio(self.chromatogram.neutral_mass +
                               node_type.mass,
                               charge,
                               charge_carrier=self.charge_carrier),
             charge,
             charge_carrier=self.charge_carrier)
         return tid
Exemple #10
0
    def isotopic_cluster(self,
                         mz,
                         charge=1,
                         charge_carrier=PROTON,
                         truncate_after=0.95,
                         ignore_below=0.0):
        """Generate a theoretical isotopic pattern for the given m/z and charge state, thresholded
        by theoretical peak height and density.

        Parameters
        ----------
        mz : float
            The reference m/z to calculate the neutral mass to interpolate from
        charge : int, optional
            The reference charge state to calculate the neutral mass. Defaults to 1
        charge_carrier : float, optional
            The mass of the charge carrier. Defaults to the mass of a proton.
        truncate_after : float, optional
            The percentage of the signal in the theoretical isotopic pattern to include.
            Defaults to 0.95, including the first 95% of the signal in the generated pattern
        ignore_below : float, optional
            Omit theoretical peaks whose intensity is below this number.
            Defaults to 0.0

        Returns
        -------
        :class:`.TheoreticalIsotopicPattern`
            The generated and thresholded pattern
        """
        composition = self.scale(mz, charge, charge_carrier)
        peaklist = isotopic_variants(composition, charge=charge)
        tid = TheoreticalIsotopicPattern(peaklist, peaklist[0].mz, 0)
        tid.shift(mz)
        if truncate_after < 1.0:
            tid.truncate_after(truncate_after)
        if ignore_below > 0:
            tid.ignore_below(ignore_below)
        return tid
Exemple #11
0
 def isotopic_cluster(self,
                      mz,
                      charge=1,
                      charge_carrier=PROTON,
                      truncate_after=0.95,
                      ignore_below=0.0):
     composition = self.scale(mz, charge, charge_carrier)
     tid = TheoreticalIsotopicPattern(
         isotopic_variants(composition, charge=charge))
     # cumsum = 0
     # result = []
     # for peak in isotopic_variants(composition, charge=charge):
     #     cumsum += peak.intensity
     #     result.append(peak)
     #     if cumsum >= truncate_after:
     #         break
     # for peak in result:
     #     peak.intensity *= 1. / cumsum
     tid.shift(mz, True)
     if truncate_after < 1.0:
         tid.truncate_after(truncate_after)
     if ignore_below > 0:
         tid.ignore_below(ignore_below)
     return tid
Exemple #12
0
def generate_isotopic_clusters_brainpy(gag, charge):
    f = gag_to_formula(gag)
    formula = dict(C=f.C, H=f.H, O=f.O, N=f.N, S=f.S)
    return isotopic_variants(formula, n_peaks=NUM_ISOTOPIC_PEAKS, charge=-charge)
Exemple #13
0
    def brainpy_function(task_dict):
        # unpack input
        isostamp_idx = task_dict['idx']
        precursor_mz = task_dict['precursor_mz']
        precursor_charge = task_dict['precursor_charge']
        obs_precursor_mass = task_dict['obs_precursor_mass']
        base_peak_correlation = task_dict['base_peak_correlation']

        # extract info
        pd_mode = 1. / precursor_charge
        mz_lower_bound = precursor_mz - param.PRECURSOR_MZ_WINDOW
        mz_upper_bound = precursor_mz + param.PRECURSOR_MZ_WINDOW

        data_md = ts_reindex_v2(task_dict['ms1_mz_intensity_df'], precision=1)

        # conditions that if met will return zero propensity
        if data_md.empty or data_md[mz_lower_bound:mz_upper_bound].sum() == 0:
            #logging.warning(f'{isostamp_idx} mz intensity spectrum is empty')
            return [isostamp_idx, 0, 0, 0, 0]
        match_intensity = data_md[(precursor_mz - 2.5 * pd_mode):(precursor_mz + 2.5 * pd_mode)]
        if match_intensity.sum() == 0:
            #logging.warning(f'{isostamp_idx} match_intensity is empty')
            return [isostamp_idx, 0, 0, 0, 0]

        # Brainpy predicts the "regular pattern" to compare against actual data
        fold_diff = obs_precursor_mass / get_averagine_mass()
        composition_estimate = {k: int(v * fold_diff) for k, v in get_averagine().items()}
        cluster = isotopic_variants(composition_estimate, charge=precursor_charge)
        # line profiler found that the first time accessing "cluster" takes ~16 ms per hit
        # despite that it is of type 'list'. explicitly cast to list
        cluster = list(cluster)
        regular_pattern = pd.Series({peak.mz: peak.intensity for peak in cluster}).sort_index().round(param.PRECISION)

        # remove peaks that are too low in intensity_array
        regular_pattern = regular_pattern[regular_pattern.values > (regular_pattern.max() * param.INTENSITY_MIN)]
        regular_pattern.index = regular_pattern.index + precursor_mz - regular_pattern.index[0]
        if param.ISOSTAMP is not None:
            isostamp_pattern = isostamp_convolution(regular_pattern, precursor_charge, param.ISOSTAMP['element_type'],
                                                    param.ISOSTAMP['element_count'], param.ISOSTAMP['ratio']).round(param.PRECISION)
        else:
            isostamp_pattern = regular_pattern.copy()

        # isostamp pattern
        isostamp_pattern = isostamp_pattern[isostamp_pattern.values > isostamp_pattern.max() * param.INTENSITY_MIN]
        isostamp_pattern.index = isostamp_pattern.index + precursor_mz - isostamp_pattern.index[2]

        # scaling constant of the relative maximum peak
        regular_pattern = regular_pattern * (match_intensity.max() / regular_pattern.max())
        isostamp_pattern = isostamp_pattern * (match_intensity.max() / isostamp_pattern.max())

        # get the diagnostic m/z for later scoring
        regular_diag_mz = np.concatenate([isostamp_pattern.index[0:2] - isostamp_pattern.index[2] + regular_pattern.index[0], regular_pattern.index])
        isostamp_diag_mz = isostamp_pattern.index

        # test off-by-one hypothesis shift m/z array to +/- some pd_mode(s). report the best one
        shift_array = np.arange(-3, 4)
        regular_propensity_array = pd.Series(0, index=shift_array)
        isostamp_propensity_array = regular_propensity_array.copy()
        for shift in regular_propensity_array.index:
            regular_mz_array = np.array(regular_pattern.index + shift * pd_mode * MASS_H)
            regular_mask = ((regular_mz_array >= mz_lower_bound)
                            & (regular_mz_array <= mz_upper_bound)
                            & (regular_pattern.values > 0))
            regular_mz_int = pd.Series(regular_pattern.values, index=regular_mz_array)[regular_mask]
            regular_diag_mz_shifted = regular_diag_mz + shift * pd_mode * MASS_H
            regular_propensity_array.loc[shift] = pearson_correlation(ts_reindex_v2(regular_mz_int, 1),
                                                               data_md, regular_diag_mz_shifted)

            isostamp_mz_array = np.array(isostamp_pattern.index + shift * pd_mode * MASS_H)
            isostamp_mask = ((isostamp_mz_array >= mz_lower_bound)
                             & (isostamp_mz_array <= mz_upper_bound)
                             & (isostamp_pattern.values > 0))
            isostamp_mz_int = pd.Series(isostamp_pattern.values, index=isostamp_mz_array)[isostamp_mask]
            isostamp_diag_mz_shifted = isostamp_diag_mz + shift * pd_mode * MASS_H
            isostamp_propensity_array.loc[shift] = pearson_correlation(ts_reindex_v2(isostamp_mz_int, 1),
                                                              data_md, isostamp_diag_mz_shifted)

        if regular_propensity_array.empty:
            regular_propensity = 0
            regular_best_shift = 0
        elif regular_propensity_array.max() > param.PRECURSOR_PROPENSITY_FLOOR:
            regular_propensity = regular_propensity_array.max()
            regular_best_shift = regular_propensity_array.idxmax()
        else:
            regular_propensity = regular_propensity_array.max()
            regular_best_shift = 0
        regular_precursor_mass = (regular_pattern.index[0] - MASS_H + regular_best_shift * pd_mode * MASS_H) * precursor_charge

        if isostamp_propensity_array.empty:
            isostamp_propensity = 0
            isostamp_best_shift = 0
        elif isostamp_propensity_array.max() > param.PRECURSOR_PROPENSITY_FLOOR:
            isostamp_propensity = isostamp_propensity_array.max()
            isostamp_best_shift = isostamp_propensity_array.idxmax()
        else:
            isostamp_propensity = isostamp_propensity_array.max()
            isostamp_best_shift = 0
        isostamp_precursor_mass = (isostamp_pattern.index[2] - MASS_H + isostamp_best_shift * pd_mode * MASS_H) * precursor_charge

        return [isostamp_idx, isostamp_propensity, regular_propensity, isostamp_precursor_mass,
                regular_precursor_mass, base_peak_correlation]
Exemple #14
0
def build_matched_modification(data, ptm_map, tol, moff_pride_flag, h_rt_w):
    """
    Computation of th. isotopic envelope tanking into account PSM modification
    :param data:
    :param ptm_map:
    :param tol:
    :param moff_pride_flag:
    :param h_rt_w:
    :return:
    """
    all_isotope_df = pd.DataFrame(
        columns=['peptide', 'mz', 'ratio_iso', 'tol', 'rt', 'matched', 'ts', 'te'])
    for row in data.itertuples():
        # get the sequence
        # for MQ sequence is (mod_tag )
        # for PS sequence is  <mod_tag>
        mq_mod_flag = False
        if mq_mod_flag:
            if not ('(' in row.mod_peptide) and mq_mod_flag:
                #  only fixed mod
                comps = Counter(
                    list(chain(*[list(std_aa_comp[aa].elements()) for aa in row.peptide])))
                comps["H"] += 2
                comps["O"] += 1
                fix_mod_count = row.peptide.count('C')
                if fix_mod_count > 0:
                    comps["H"] += (ptm_map['cC']['deltaChem']
                                   [0] * fix_mod_count)
                    comps["C"] += (ptm_map['cC']['deltaChem']
                                   [1] * fix_mod_count)
                    comps["N"] += (ptm_map['cC']['deltaChem']
                                   [2] * fix_mod_count)
                    comps["O"] += (ptm_map['cC']['deltaChem']
                                   [3] * fix_mod_count)
            else:
                comps = Counter(
                    list(chain(*[list(std_aa_comp[aa].elements()) for aa in row.peptide])))
                for ptm in ptm_map.keys():
                    ptm_c = row.mod_peptide.count(ptm)
                    if ptm_c >= 1:
                        comps["H"] += (ptm_map[ptm]['deltaChem'][0] * ptm_c)
                        comps["C"] += (ptm_map[ptm]['deltaChem'][1] * ptm_c)
                        comps["N"] += (ptm_map[ptm]['deltaChem'][2] * ptm_c)
                        comps["O"] += (ptm_map[ptm]['deltaChem'][3] * ptm_c)
                # add eventually fixed mod/
                fix_mod_count = row.mod_peptide.count('C')
                if fix_mod_count > 0:
                    comps["H"] += (ptm_map['cC']['deltaChem']
                                   [0] * fix_mod_count)
                    comps["C"] += (ptm_map['cC']['deltaChem']
                                   [1] * fix_mod_count)
                    comps["N"] += (ptm_map['cC']['deltaChem']
                                   [2] * fix_mod_count)
                    comps["O"] += (ptm_map['cC']['deltaChem']
                                   [3] * fix_mod_count)
                comps["H"] += 2
                comps["O"] += 1
        else:
            # fixed and variable mod are both in the sequence
            comps = Counter(
                list(chain(*[list(std_aa_comp[aa].elements()) for aa in row.peptide])))
            if '<' in row.mod_peptide or '-' in row.mod_peptide:
                # check only if modificatio are present.
                # for the future use dthe tag_mod_sequence_delimiter use in moFF_setting
                for ptm in ptm_map.keys():
                    ptm_c = row.mod_peptide.count(ptm)
                    # ptm_c =  sum(ptm in s for s in row.mod_peptide)
                    if ptm_c >= 1:
                        comps["H"] += (ptm_map[ptm]['deltaChem'][0] * ptm_c)
                        comps["C"] += (ptm_map[ptm]['deltaChem'][1] * ptm_c)
                        comps["N"] += (ptm_map[ptm]['deltaChem'][2] * ptm_c)
                        comps["O"] += (ptm_map[ptm]['deltaChem'][3] * ptm_c)
            comps["H"] += 2
            comps["O"] += 1

        theoretical_isotopic_cluster = isotopic_variants(
            comps,   charge= int(round(row.mass / float(row.mz))) , npeaks=3)
        mz_iso = [peak.mz for peak in theoretical_isotopic_cluster]
        delta = mz_iso[0] - mz_iso[1]
        mz_iso.append(mz_iso[0] + delta)
        ratio_iso = [peak.intensity for peak in theoretical_isotopic_cluster]
        ratio_iso.append(-1)
        isotopic_df = pd.DataFrame({'mz': mz_iso, 'ratio_iso': ratio_iso})

        isotopic_df.loc[:, 'exp_mz'] = row.mz
        isotopic_df.loc[:, 'peptide'] = row.mod_peptide
        isotopic_df.loc[:, 'tol'] = int(tol)
        isotopic_df.loc[:, 'rt'] = row.rt
        isotopic_df.loc[:, 'matched'] = 1
        if moff_pride_flag:
            # moffpridedata  rt is in minutes
            isotopic_df['ts'] = (row.rt) - h_rt_w
            isotopic_df['te'] = (row.rt) + h_rt_w
        else:
            # not moffpridedata rt in second
            isotopic_df['ts'] = (row.rt / 60) - h_rt_w
            isotopic_df['te'] = (row.rt / 60) + h_rt_w

        all_isotope_df = pd.concat(
            [all_isotope_df, isotopic_df], join='outer', axis=0, sort=False)
    all_isotope_df.reset_index(inplace=True)

    return all_isotope_df
Exemple #15
0
def build_matched_modification(data, ptm_map, tol, moff_pride_flag, h_rt_w):
    """
    Computation of th. isotopic envelope tanking into account PSM modification
    :param data:
    :param ptm_map:
    :param tol:
    :param moff_pride_flag:
    :param h_rt_w:
    :return:
    """
    all_isotope_df = pd.DataFrame(
        columns=['peptide', 'mz', 'ratio_iso', 'tol', 'rt', 'matched', 'ts', 'te'])
    for row in data.itertuples():
        # get the sequence
        # for MQ sequence is (mod_tag )
        # for PS sequence is  <mod_tag>
        mq_mod_flag = False
        if mq_mod_flag:
            if not ('(' in row.mod_peptide) and mq_mod_flag:
                #  only fixed mod
                comps = Counter(
                    list(chain(*[list(std_aa_comp[aa].elements()) for aa in row.peptide])))
                comps["H"] += 2
                comps["O"] += 1
                fix_mod_count = row.peptide.count('C')
                if fix_mod_count > 0:
                    comps["H"] += (ptm_map['cC']['deltaChem']
                                   [0] * fix_mod_count)
                    comps["C"] += (ptm_map['cC']['deltaChem']
                                   [1] * fix_mod_count)
                    comps["N"] += (ptm_map['cC']['deltaChem']
                                   [2] * fix_mod_count)
                    comps["O"] += (ptm_map['cC']['deltaChem']
                                   [3] * fix_mod_count)
            else:
                comps = Counter(
                    list(chain(*[list(std_aa_comp[aa].elements()) for aa in row.peptide])))
                for ptm in ptm_map.keys():
                    ptm_c = row.mod_peptide.count(ptm)
                    if ptm_c >= 1:
                        comps["H"] += (ptm_map[ptm]['deltaChem'][0] * ptm_c)
                        comps["C"] += (ptm_map[ptm]['deltaChem'][1] * ptm_c)
                        comps["N"] += (ptm_map[ptm]['deltaChem'][2] * ptm_c)
                        comps["O"] += (ptm_map[ptm]['deltaChem'][3] * ptm_c)
                # add eventually fixed mod/
                fix_mod_count = row.mod_peptide.count('C')
                if fix_mod_count > 0:
                    comps["H"] += (ptm_map['cC']['deltaChem']
                                   [0] * fix_mod_count)
                    comps["C"] += (ptm_map['cC']['deltaChem']
                                   [1] * fix_mod_count)
                    comps["N"] += (ptm_map['cC']['deltaChem']
                                   [2] * fix_mod_count)
                    comps["O"] += (ptm_map['cC']['deltaChem']
                                   [3] * fix_mod_count)
                comps["H"] += 2
                comps["O"] += 1
        else:
            # fixed and variable mod are both in the sequence
            comps = Counter(
                list(chain(*[list(std_aa_comp[aa].elements()) for aa in row.peptide])))
            if '<' in row.mod_peptide or '-' in row.mod_peptide:
                # check only if modificatio are present.
                # for the future use dthe tag_mod_sequence_delimiter use in moFF_setting
                for ptm in ptm_map.keys():
                    ptm_c = row.mod_peptide.count(ptm)
                    # ptm_c =  sum(ptm in s for s in row.mod_peptide)
                    if ptm_c >= 1:
                        comps["H"] += (ptm_map[ptm]['deltaChem'][0] * ptm_c)
                        comps["C"] += (ptm_map[ptm]['deltaChem'][1] * ptm_c)
                        comps["N"] += (ptm_map[ptm]['deltaChem'][2] * ptm_c)
                        comps["O"] += (ptm_map[ptm]['deltaChem'][3] * ptm_c)
            comps["H"] += 2
            comps["O"] += 1

        theoretical_isotopic_cluster = isotopic_variants(
            comps,   charge= int(round(row.mass / float(row.mz))) , npeaks=3)
        mz_iso = [peak.mz for peak in theoretical_isotopic_cluster]
        delta = mz_iso[0] - mz_iso[1]
        mz_iso.append(mz_iso[0] + delta)
        ratio_iso = [peak.intensity for peak in theoretical_isotopic_cluster]
        ratio_iso.append(-1)
        isotopic_df = pd.DataFrame({'mz': mz_iso, 'ratio_iso': ratio_iso})

        isotopic_df.loc[:, 'exp_mz'] = row.mz
        isotopic_df.loc[:, 'peptide'] = row.mod_peptide
        isotopic_df.loc[:, 'tol'] = int(tol)
        isotopic_df.loc[:, 'rt'] = row.rt
        isotopic_df.loc[:, 'matched'] = 1
        if moff_pride_flag:
            # moffpridedata  rt is in minutes
            isotopic_df['ts'] = (row.rt) - h_rt_w
            isotopic_df['te'] = (row.rt) + h_rt_w
        else:
            # not moffpridedata rt in second
            isotopic_df['ts'] = (row.rt / 60) - h_rt_w
            isotopic_df['te'] = (row.rt / 60) + h_rt_w

        all_isotope_df = pd.concat(
            [all_isotope_df, isotopic_df], join='outer', axis=0, sort=False)
    all_isotope_df.reset_index(inplace=True)

    return all_isotope_df