def getNewEnvelope(self, atomCnt_str, jP, precDigits): counts = [] isotope_masses = [] isotope_probs = [] atomCnt = self.formParser.parse(atomCnt_str) for el, cnt in list(atomCnt.items()): counts.append(cnt) isotope_masses.append(self.isoMasses[el]) isotope_probs.append(self.isoProbs[el]) envelope = IsoSpecPy.IsoSpec(counts, isotope_masses, isotope_probs, jP) masses, logprobs, _ = envelope.getConfsRaw() masses = cdata2numpyarray(masses) probs = np.exp(cdata2numpyarray(logprobs)) masses, probs = agg_spec_proper(masses, probs, precDigits) # memoization self.isotopicEnvelopes[(atomCnt_str, jP, precDigits)] = (masses, probs) return masses.copy(), probs.copy()
def __getNewEnvelope(self, atomCnt_str, jP, prec_digits): T0 = time() counts = [] isotope_masses = [] isotope_probs = [] atomCnt = self.formParser.parse(atomCnt_str) for el, cnt in atomCnt.items(): counts.append(cnt) isotope_masses.append(self.iso_masses[el]) isotope_probs.append(self.iso_probs[el]) envelope = IsoSpecPy.IsoSpec(counts, isotope_masses, isotope_probs, jP) masses, logprobs, _ = envelope.getConfsRaw() masses = cdata2numpyarray(masses) probs = np.exp(cdata2numpyarray(logprobs)) masses, probs = aggregate_envelopes(masses, probs, prec_digits) # memoization#TODO get rid of it when IsoSpec using stats convolution self.isotopicEnvelopes[(atomCnt_str, jP, prec_digits)] = (masses, probs) T1 = time() self.stats['Envelopes Generation Total T'] += T1 - T0 return masses.copy(), probs.copy()
def main(): global m_proton parser = GooeyParser(prog='IsoTool') #parser = argparse.ArgumentParser() parser.add_argument('Molecule', type=str, widget='FileChooser') g = parser.add_argument_group() isos = g.add_mutually_exclusive_group(required=True) isos.add_argument('--terrestrial', metavar='Terrestrial abundances', default=os.path.join(data_folder, 'isotopes_terrestrial.csv'), dest='isotopes') isos.add_argument('--custom', metavar='Custom isotope definitons', dest='isotopes', widget='FileChooser') #parser.add_argument('-i', '--isotopes', metavar='Isotopes', # help='Isotope masses and abundances', type=str, widget='FileChooser', # default = os.path.join(folder,'isotopes_terrestrial.csv')) parser.add_argument('-o', '--output_folder', metavar='Output Folder', type=str, widget='DirChooser', default=folder) parser.add_argument( '-z', '--charge', metavar='Charge', help='z, adds protons and recalculates m/z accordingly', default=1, dest='z', type=int) parser.add_argument('-r', '--resolution', metavar='Resolution', help='m/z*1/FWHM, determines Gaussian peak width', type=int, default=40000) parser.add_argument('-bs', '--binsize', help='for summing Gaussian peaks of isotopologues', type=float, default=0.002) parser.add_argument('-ps', '--points', help='no. of points for Gaussian peak', type=int, default=250) parser.add_argument('-dmz', '--delta_mz', metavar='Delta m/z', help='two sided mass offset for Gaussian peaks', type=float, default=0.05) parser.add_argument( '-p', '--proba', metavar='Probability Cutoff', help='Cumulative probability threshold for isotopologue calculations', type=float, default=0.9999) args = parser.parse_args() isotopes = pd.read_csv(args.isotopes, index_col='element_symbol').dropna() mol = pd.read_csv(args.Molecule).set_index('element') mol.loc['H', 'n'] += args.z elements = [ e for e in mol.reset_index().element.values if mol.loc[e].n > 0 ] atoms = [n for n in mol.n.values if n > 0] masses = [] probs = [] for e in elements: try: assert e in isotopes.index temp_masses = tuple(isotopes.loc[e].atomic_mass.values) masses.append(temp_masses) temp_probs = tuple(isotopes.loc[e].abundance.values) probs.append(temp_probs) except: raise ValueError('Element %s has no defined isotopes' % e) pass i = IsoSpecPy.IsoSpec(atoms, masses, probs, args.proba) confs = i.getConfs() confs = pd.DataFrame(confs).transpose() confs.columns = ['mz', 'p', 'isotopologue'] confs.p = confs.p.astype(np.float64).apply(np.exp) confs.mz = (confs.mz - m_electron) / args.z confs = confs.sort_values('mz') f = isotopes.loc[elements] iso_cols = [ ''.join([str(y), str(x)]) for (x, y) in zip(f.index, f.mass_number) ] temp_df = pd.DataFrame.from_records(confs.isotopologue, columns=iso_cols) confs = pd.concat( [ confs[['mz', 'p']].reset_index(drop=True), temp_df.reset_index(drop=True) ], axis=1, ) print(confs.head().to_string()) spectrum = peak_shaper(confs[['mz', 'p']].values, args.resolution, delta_mz=args.delta_mz, binsize=args.binsize, no_points=args.points, normalize=True, process_binning=True) spectrum = pd.DataFrame(spectrum, columns=['mz', 'int']) confs.to_csv(os.path.join(args.output_folder, 'centroids.csv'), index=False) spectrum.to_csv(os.path.join(args.output_folder, 'spectrum.csv'), index=False)
print "Number of Protium atoms:", confs[0][2][0][0] print "Number of Deuterium atoms", confs[0][2][0][1] print "Number of O16 atoms:", confs[0][2][1][0] print "Number of O17 atoms:", confs[0][2][1][1] print "Number of O18 atoms:", confs[0][2][1][2] print print "Now what if both isotopes of hydrogen were equally probable, while prob. of O16 was 50%, O17 at 30% and O18 at 20%?" hydrogen_probs = (0.5, 0.5) oxygen_probs = (0.5, 0.3, 0.2) hydrogen_masses = (1.00782503207, 2.0141017778) oxygen_masses = (15.99491461956, 16.99913170, 17.9991610) atom_counts = (2, 1) i = IsoSpecPy.IsoSpec(atom_counts, (hydrogen_masses, oxygen_masses), (hydrogen_probs, oxygen_probs), 0.9) print "The isotopologue set containing at least 0.9 probability has", len( i), "element(s)" confs = i.getConfs() print "The first configuration has the following parameters:" print "Mass:", confs[0][0] print "log-prob:", confs[0][1] print "probability:", exp(confs[0][1]) print "Number of Protium atoms:", confs[0][2][0][0] print "Number of Deuterium atoms", confs[0][2][0][1] print "Number of O16 atoms:", confs[0][2][1][0] print "Number of O17 atoms:", confs[0][2][1][1] print "Number of O18 atoms:", confs[0][2][1][2]
def _cal_isotopologues(self, formula_dict, min_abundance, current_abundance, ms_dynamic_range): ''' primary function to look for isotopologues based on a monoisotopic molecular formula INPUT {'C':10, 'H', 20, 'O', 2, etc} Atomic labels need to follow Atoms class atoms labels This function needs to be expanded to include the calculation of resolving power and plot the results. * use this function at runtime during the molecular identification algorithm only when a positive ID is observed to the monoisotopic ion * use this function to simulate mass spectrum (needs resolving power calculation to be fully operational) last update on 05-26-2020, Yuri E. Corilo * it might break when adding non-conventional atoms (not yet tested) * it needs speed optimization; update: (Using IsoSpeccPy, a C Library (fast and accurate)) https://github.com/MatteoLacki/IsoSpec ''' # updated it to reflect min possible mass peak abundance cut_off_to_IsoSpeccPy = 1-(1/ms_dynamic_range) #print("cut_off_to_IsoSpeccPy", cut_off_to_IsoSpeccPy, current_abundance, min_abundance, ms_dynamic_range) #print(cut_off_to_IsoSpeccPy) atoms_labels = (atom for atom in formula_dict.keys() if atom != Labels.ion_type and atom != 'H') atoms_count = [] masses_list_tuples = [] props_list_tuples = [] all_atoms_list = [] for atom_label in atoms_labels: if not len(Atoms.isotopes.get(atom_label))>1: 'This atom_label has no heavy isotope' atoms_count.append(formula_dict.get(atom_label)) mass = Atoms.atomic_masses.get(atom_label) prop = Atoms.isotopic_abundance.get(atom_label) masses_list_tuples.append([mass]) props_list_tuples.append([prop]) all_atoms_list.append(atom_label) else: isotopes_label_list = Atoms.isotopes.get(atom_label)[1] if len(isotopes_label_list) > 1: 'This atom_label has two or more heavy isotope' isotopos_labels = [i for i in isotopes_label_list] else: 'This atom_label only has one heavy isotope' isotopos_labels = [isotopes_label_list[0]] #all_atoms_list.extend(isotopos_labels) isotopos_labels = [atom_label] + isotopos_labels all_atoms_list.extend(isotopos_labels) masses = [Atoms.atomic_masses.get(atom_label) for atom_label in isotopos_labels] props = [Atoms.isotopic_abundance.get(atom_label) for atom_label in isotopos_labels] atoms_count.append(formula_dict.get(atom_label)) masses_list_tuples.append(masses) props_list_tuples.append(props) iso = IsoSpecPy.IsoSpec(atoms_count,masses_list_tuples,props_list_tuples, cut_off_to_IsoSpeccPy) conf = iso.getConfs() masses = conf[0] probs = exp(conf[1]) molecular_formulas = conf[2] #print('conf', conf) #print('probs', conf[1]) new_formulas = [] for isotopologue_index in range(len(iso)): #skip_mono_isotopic formula_list = molecular_formulas[isotopologue_index] new_formula_dict = dict(zip(all_atoms_list, formula_list)) new_formula_dict[Labels.ion_type] = formula_dict.get(Labels.ion_type) if formula_dict.get('H'): new_formula_dict['H'] = formula_dict.get('H') new_formulas.append({x:y for x,y in new_formula_dict.items() if y!=0}) # formula_dict in new_formulas check if monoisotopic is being returned if new_formulas:# and formula_dict in new_formulas: #print(conf) #print(new_formulas) #print(atoms_count) #print(all_atoms_list) #print(masses_list_tuples) #print(props_list_tuples) # find where monoisotopic is index_mono = new_formulas.index(formula_dict) # calculate ratio iso/mono probs = list(probs/probs[index_mono]) # delete the monoisotopic del probs[index_mono] del new_formulas[index_mono] #print('probs_exp', probs) for formulas, prob in zip(new_formulas, probs): theor_abundance = current_abundance* prob if theor_abundance > min_abundance: #print(prob, theor_abundance, current_abundance) yield (formulas, prob)