def test_most_probable_isotopic_composition(self): self.assertEqual( mass.most_probable_isotopic_composition(formula='F', mass_data=self.mass_data), (mass.Composition({ 'F[6]': 1, 'F[7]': 0 }, mass_data=self.mass_data), 0.7)) self.assertEqual( mass.most_probable_isotopic_composition(formula='F10', mass_data=self.mass_data), (mass.Composition({ 'F[6]': 7, 'F[7]': 3 }, mass_data=self.mass_data), (0.3)**3 * (0.7)**7 * 120)) self.assertEqual( mass.most_probable_isotopic_composition( formula='A20F10', elements_with_isotopes=['F'], mass_data=self.mass_data), (mass.Composition( { 'A': 20, 'F[6]': 7, 'F[7]': 3 }, mass_data=self.mass_data), (0.3)**3 * (0.7)**7 * 120))
def aion_composition(self, n): mods = self.mods() #print mods comp0 = mass.Composition(self.stripped_seq[:n]) comp0['C'] -= 1 comp0['H'] -= 2 comp0['O'] -= 2 #print comp0 for i in mods: #print i #print i < n if i < n: if mods[i][0] == '-': modComp = mass.Composition(formula=mods[i][1:]) modComp = {k: -modComp[k] for k in modComp} else: modComp = mass.Composition(formula=mods[i]) #print modComp for element in modComp: if element in comp0: comp0[element] += modComp[element] else: comp0[element] = modComp[element] #print comp0 return comp0
def test_Composition_sum(self): # Test sum of Composition objects. self.assertEqual( mass.Composition(sequence='XXY', aa_comp=self.aa_comp) + mass.Composition(sequence='YZZ', aa_comp=self.aa_comp), {atom: 2 for atom in 'ABCDE'})
def get_charges(): return [ (1, mass.Composition({"H": 1})), (2, mass.Composition({"H": 2})), (3, mass.Composition({"H": 3})), (4, mass.Composition({"H": 4})), (0, mass.Composition({"H": 0})), ]
def test_Composition_mul(self): # Test multiplication of Composition by integers self.assertEqual( 2 * mass.Composition(sequence='XYZ', aa_comp=self.aa_comp), {atom: 2 for atom in 'ABCDE'}) self.assertEqual( mass.Composition(sequence='XYZ', aa_comp=self.aa_comp) * 2, {atom: 2 for atom in 'ABCDE'})
def read_compounds(filename, separator="\t", calculate=True, lib_adducts=[], filename_atoms=""): if calculate: path_nist_database = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'data', 'nist_database.txt') nist_database = nist_database_to_pyteomics(path_nist_database) df = read_csv(filename, sep=separator, float_precision="round_trip") records = [] for index, row in df.iterrows(): record = collections.OrderedDict() comp = pyteomics_mass.Composition(str(row.molecular_formula)) if comp: record["composition"] = collections.OrderedDict( (k, comp[k]) for k in order_composition_by_hill(comp.keys())) sum_CHNOPS = sum( [comp[e] for e in comp if e in ["C", "H", "N", "O", "P", "S"]]) record["CHNOPS"] = sum_CHNOPS == sum(list(comp.values())) if calculate: record["exact_mass"] = round( pyteomics_mass.calculate_mass(formula=str( str(row.molecular_formula)), mass_data=nist_database), 6) else: record["exact_mass"] = float(row.exact_mass) record["compound_id"] = row.compound_id record["compound_name"] = row.compound_name comp = pyteomics_mass.Composition(str(row.molecular_formula)) record["molecular_formula"] = composition_to_string(comp) if "retention_time" in df.columns: record["retention_time"] = row.retention_time elif "rt" in df.columns: record["retention_time"] = row.rt if "adduct" in df.columns: record["adduct"] = row.adduct if lib_adducts and calculate: record["exact_mass"] += lib_adducts.lib[row.adduct]["mass"] records.append(record) else: Warning("{} Skipped".format(row)) return records
def test_composition(get_composition): assert mass.Composition("ACDE") assert mass.Composition("A") + mass.Composition("C") assert mass.Composition(parsed_sequence="ACDE") == { 'H': 22, 'C': 15, 'O': 8, 'N': 4, 'S': 1 } for data in get_composition: sequence = data[0] expected = data[1] assert mass.Composition(sequence) == expected
def test_Composition_sseq(self): # Test Composition from a split sequence. self.assertEqual( mass.Composition(split_sequence=[('X', ), ('Y', ), ('Z', )], aa_comp=self.aa_comp), {atom: 1 for atom in 'ABC'})
def test_Composition_pseq(self): # Test Composition from a parsed sequence. self.assertEqual( mass.Composition(parsed_sequence=['X', 'Y', 'Z'], aa_comp=self.aa_comp), {atom: 1 for atom in 'ABC'})
def _formula_parser(formula, session): ''' Parse a unimod formula composed of elements, isotopes, and other bricks. In order to look up a Brick's composition, this function must have access to a session. ''' composition = mass.Composition() for token in formula.split(" "): match = re.search(r"(?P<isotope>\d+?)?(?P<elemet>[^\(]+)(?:\((?P<count>-?\d+)\))?", token) if match: isotope, element, count = match.groups() if count is not None: count = int(count) else: count = 1 if isotope is not None: name = mass._make_isotope_string(element, isotope) else: name = element is_brick = session.query(Brick).filter(Brick.brick == name).first() if is_brick is None: composition[name] += count else: composition += is_brick.composition * count return composition
def test_Composition_positional(self): # Test creation from positional args ac = self.aa_comp.copy() ac.update(self.mods) self.assertEqual(mass.Composition('aXbYZ', aa_comp=ac), { 'A': 2, 'B': 2, 'C': 1, 'D': 1, 'E': 1 }) self.assertEqual(mass.Composition('AB2C3', mass_data=self.mass_data), { 'A': 1, 'B': 2, 'C': 3 })
def get_mods_composition(modifications): """Return the composition of a list of modifications. Parameters ---------- modifications : list of str List of modifications string (corresponding to Unimod titles). Returns ------- pyteomics.mass.Composition The total composition change. """ # ???: Have the mass.Unimod() dict as parameter ? total_mod_composition = mass.Composition() for mod in modifications: try: mod_composition = UNIMOD_MODS.by_title(mod)["composition"] total_mod_composition += mod_composition # Using set comparison here won't work with elements as isotopes. for elem in mod_composition: if elem not in USED_ELEMS: log.warning(f"{elem} in ({mod}) is not supported " "in the computation of M0 and M1") except (KeyError, AttributeError, TypeError): log.warning(f"Unimod entry not found for : {mod}") return total_mod_composition
def calculateMassUncertainty(processedSpectrum, weighted=False, dfOutput=True, show=False): data = [processedSpectrum["formula"],processedSpectrum["formula_mz"]-processedSpectrum["observed_mz"]] headers = ["formula", "uncertainty"] instance = pd.concat(data, axis=1, keys=headers) elements = {} for index, row in instance.iterrows(): ion = row["formula"] ion = ion if ion[-1] != '-' else ion[:-1] tmp = mass.Composition(formula=ion) v = row["uncertainty"] total = sum(tmp.values()) for e in tmp.keys(): f = 1 if weighted: f = tmp[e] / total if e not in elements: elements[e] = [v*f] else: elements[e].append(v*f) for e in elements.keys(): elements[e] = sum(elements[e]) / len(elements[e]) if show: keys = elements.keys() values = elements.values() plt.figure(1) plt.bar(keys, values) plt.ylabel('Error') plt.xlabel('Elements') if dfOutput: df = pd.DataFrame(elements.items(), columns=['Element', 'Uncertainty']) return df else: return elements
def expand_isotopes(peptide, charge_states=[2, 3]): ''' Convert peptide to DataFrame of isotopic peaks Input Series, should contain 'sequence', 'z+' columns, and model columns Return DataFrame with one row for each isotopic peak columns are: mz - m/z of ion ic_XX - ion abundance acording to XX model z - charge sequence - peptide sequence ''' formula = ''.join([ '{}{}'.format(x, y) for x, y in mass.Composition(peptide['sequence']).items() ]) cluster = IsoSpecPy.IsoThreshold(formula=formula, threshold=0.005, absolute=True) mz0 = cluster.np_masses() int0 = cluster.np_probs() mz = np.concatenate([get_ions(mz0, z) for z in charge_states]) ic = np.concatenate( [int0 * peptide['{}+'.format(z)] for z in charge_states]) charge = np.concatenate( [np.repeat(z, mz0.shape[0]) for z in charge_states]) result = pd.DataFrame({'mz': mz, 'ic': ic, 'z': charge}) result['sequence'] = peptide['sequence'] for model in params.ion_models: result['ic_{}'.format(model)] = result['ic'] * peptide[model] return result
def test_Composition_formula(self): # Test Composition from a formula. self.assertEqual( self.d, mass.Composition( formula='ABCDE', mass_data={atom: { 0: (1.0, 1.0) } for atom in 'ABCDE'}))
def get_mods(): return [ (["Oxidation"], mass.Composition({"O": 1})), (["Acetyl", "Phospho"], mass.Composition({ 'H': 3, 'C': 2, 'O': 4, "P": 1 })), (["Acetyl", "Phospho", "not_mod"], mass.Composition({ 'H': 3, 'C': 2, 'O': 4, "P": 1 })), ([], mass.Composition()), ]
def test_calculate_mass(get_mass): assert mass.calculate_mass("ACDE") == pytest.approx(436.12639936, REL) assert mass.calculate_mass(mass.Composition("ACDE")) == pytest.approx( 436.12639936, REL) assert mass.calculate_mass(parsed_sequence="ACDE") == pytest.approx( 418.115834, REL) assert mass.calculate_mass("A") == pytest.approx(89.04767846841, REL) for data in get_mass: sequence = data[0] expected = data[1] assert mass.calculate_mass(sequence) == pytest.approx(expected, REL)
def test_convert_atom_C_to_X(): assert stfi.convert_atom_C_to_X("ACDE") == mass.Composition({ 'H': 24, 'O': 9, 'N': 4, 'S': 1, 'X': 15 }) assert stfi.convert_atom_C_to_X("PEPTIDE") == mass.Composition({ 'H': 53, 'O': 15, 'N': 7, 'X': 34 }) assert stfi.convert_atom_C_to_X( "ACDEFGHIKLMNPQRSTVWY") == mass.Composition({ 'H': 159, 'O': 30, 'N': 29, 'S': 2, 'X': 107 })
def test_composition_objects_are_pickleable(self): dict_ = mass.Composition(self.d, mass_data=self.mass_data) formula = mass.Composition( formula='ABCDE', mass_data={atom: { 0: (1.0, 1.0) } for atom in 'ABCDE'}) sequence = mass.Composition(sequence='XYZ', aa_comp=self.aa_comp) parsed_sequence = mass.Composition(parsed_sequence=['X', 'Y', 'Z'], aa_comp=self.aa_comp) split_sequence = mass.Composition(split_sequence=[('X', ), ('Y', ), ('Z', )], aa_comp=self.aa_comp) self.assertEqual(dict_, pickle.loads(pickle.dumps(dict_))) self.assertEqual(formula, pickle.loads(pickle.dumps(formula))) self.assertEqual(sequence, pickle.loads(pickle.dumps(sequence))) self.assertEqual(parsed_sequence, pickle.loads(pickle.dumps(parsed_sequence))) self.assertEqual(split_sequence, pickle.loads(pickle.dumps(split_sequence)))
def composition(self): composition = mass.Composition() for element_relation in self.elements: symbol = element_relation.element isotope, element = re.search(r"(?P<isotope>\d*)?(?P<element>\S+)", symbol).groups() if isotope != "": isotope = int(isotope) iso_str = mass._make_isotope_string(element, isotope) else: iso_str = element count = element_relation.count composition[iso_str] = count return composition
def test_computation_isotopologue(): # Standard formula. test_composition = mass.Composition("ACDE") assert stfi.compute_M0_nl(test_composition, stfi.NATURAL_ABUNDANCE) == pytest.approx( 0.77662382, REL) assert stfi.compute_M0_nl(test_composition, stfi.C12_ABUNDANCE) == pytest.approx( 0.911253268, REL) assert stfi.compute_M1_nl(test_composition, stfi.NATURAL_ABUNDANCE) == pytest.approx( 0.1484942353, REL) assert stfi.compute_M1_nl(test_composition, stfi.C12_ABUNDANCE) == pytest.approx( 0.0277650369575, REL)
def validate(self): if self.stripped_seq == "": return False allowed_chars = 'ACDEFGHIKLMNPQRSTVWY' for char in self.stripped_seq: if char not in allowed_chars: return False for mod in self.mods().values(): try: mass.Composition(mod) except: return False return True
def calculate_b_y_ion(sequence, ion_charge): aa_comp = dict(mass.std_aa_comp) aa_comp['C'] = mass.Composition({'H': 8, 'C': 5, 'S': 1, 'O': 2, 'N': 2}) b_ion = [ mass.calculate_mass(sequence[:aa], ion_type='b', charge=ion_charge, aa_comp=aa_comp) for aa in range(1, len(sequence)) ] # aa = the amino acid residue y_ion = [ mass.calculate_mass(sequence[aa:], ion_type='y', charge=ion_charge, aa_comp=aa_comp) for aa in range(1, len(sequence)) ] y_ion.reverse() # record from small to big return (tuple(b_ion), tuple(y_ion))
def get_charge_composition(charge): """Return the composition of a given charge (only H+). Parameters ---------- charge : int Peptide charge. Returns ------- pyteomics.mass.Composition Composition of the change (H+). """ charge_composition = mass.Composition() charge_composition["H"] = charge return charge_composition
def test_deprecated_computation_isotopologue(): test_composition = mass.Composition("ACDE") stfi.compute_M0 = stfi.seq_to_first_iso.compute_M0 stfi.compute_M1 = stfi.seq_to_first_iso.compute_M1 assert stfi.compute_M0(test_composition, stfi.NATURAL_ABUNDANCE) == pytest.approx( 0.77662382, REL) assert stfi.compute_M0(test_composition, stfi.C12_ABUNDANCE) == pytest.approx( 0.911253268, REL) assert stfi.compute_M1(test_composition, stfi.NATURAL_ABUNDANCE) == pytest.approx( 0.1484942353, REL) assert stfi.compute_M1(test_composition, stfi.C12_ABUNDANCE) == pytest.approx( 0.0277650369575, REL)
def test_isotopologues(self): peptide = 'XYF' states = [{ 'F[6]': 1, 'A': 1, 'B': 1, 'D': 1, 'E': 1 }, { 'F[7]': 1, 'A': 1, 'B': 1, 'D': 1, 'E': 1 }] abundances = [0.7, 0.3] kw_common = dict(elements_with_isotopes='F', aa_comp=self.aa_comp, mass_data=self.mass_data) kwlist = [{}, { 'sequence': 'XYF' }, { 'parsed_sequence': parser.parse('XYF', show_unmodified_termini=True) }, { 'split_sequence': parser.parse('XYF', show_unmodified_termini=True, split=True) }, { 'formula': 'ABDEF' }, { 'composition': mass.Composition(sequence='XYF', aa_comp=self.aa_comp) }] arglist = [(peptide, ), (), (), (), (), ()] for args, kw in zip(arglist, kwlist): kwargs = kw_common.copy() kwargs.update(kw) isotopologues = mass.isotopologues(*args, **kwargs) for state in isotopologues: i = states.index(state) self.assertNotEqual(i, -1) self.assertAlmostEqual( abundances[i], mass.isotopic_composition_abundance( state, aa_comp=self.aa_comp, mass_data=self.mass_data))
def convert_atom_C_to_X(sequence): """Replace carbon atom by element X atom in a composition. Parameters ---------- sequence : str or pyteomics.mass.Composition Sequence or composition. Returns ------- pyteomics.mass.Composition Composition with carbon atoms replaced by element X atoms. """ # Force input to be a pyteomics.mass.Composition object. formula = mass.Composition(sequence) # Replace C atoms by X atoms. formula["X"] = formula.pop("C", 0) return formula
def HC_HNOPS_rules(molecular_formula): composition = pyteomics_mass.Composition(molecular_formula) rules = {"HC": 0, "NOPSC": 0} if "C" not in composition or "H" not in composition: rules["HC"] = 0 elif "C" not in composition and "H" not in composition: rules["HC"] = 0 elif "C" in composition and "H" in composition: if float(composition['H']) / float( (composition['C'])) > 0 and float(composition['H'] / (composition['C'])) < 6: rules["HC"] = 1 if float(composition['H']) / float((composition['C'])) >= 6: rules["HC"] = 0 NOPS_check = [] for element in ['N', 'O', 'P', 'S']: if element in composition and "C" in composition: NOPS_check.append( float(float(composition[element])) / float((composition['C']))) else: NOPS_check.append(float(0)) if NOPS_check[0] >= float(0) and \ NOPS_check[0] <= float(4) and \ NOPS_check[1] >= float(0) and \ NOPS_check[1] <= float(3) and \ NOPS_check[2] >= float(0) and \ NOPS_check[2] <= float(2) and \ NOPS_check[3] >= float(0) and \ NOPS_check[3] <= float(3): rules["NOPSC"] = 1 if NOPS_check[0] > float(4) or NOPS_check[1] > float( 3) or NOPS_check[2] > float(2) or NOPS_check[3] > float(3): rules["NOPSC"] = 0 return rules
def read_molecular_formulae(filename, separator="\t", calculate=True, filename_atoms=""): if calculate: path_nist_database = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'data', 'nist_database.txt') nist_database = nist_database_to_pyteomics(path_nist_database) df = read_csv(filename, sep=separator, float_precision="round_trip") records = [] for index, row in df.iterrows(): record = collections.OrderedDict() comp = pyteomics_mass.Composition(str(row.molecular_formula)) if comp: record["composition"] = collections.OrderedDict( (k, comp[k]) for k in order_composition_by_hill(comp.keys())) sum_CHNOPS = sum( [comp[e] for e in comp if e in ["C", "H", "N", "O", "P", "S"]]) record["CHNOPS"] = sum_CHNOPS == sum(list(comp.values())) if calculate: record["exact_mass"] = round( pyteomics_mass.mass.calculate_mass( formula=str(row.molecular_formula), mass_data=nist_database), 6) else: record["exact_mass"] = float(row.exact_mass) record.update(HC_HNOPS_rules(str(row.molecular_formula))) record.update(lewis_senior_rules(str(row.molecular_formula))) record["double_bond_equivalents"] = double_bond_equivalents( record["composition"]) records.append(record) else: Warning("{} Skipped".format(row)) return records
def lewis_senior_rules(molecular_formula): valence = {'C': 4, 'H': 1, 'N': 3, 'O': 2, 'P': 3, 'S': 2} composition = pyteomics_mass.Composition(molecular_formula) rules = {"lewis": 0, "senior": 0} lewis_sum = 0 for element in valence: if element in composition: lewis_sum += valence[element] * composition[element] if lewis_sum % 2 == 0: rules["lewis"] = 1 if lewis_sum % 2 != 0: rules["lewis"] = 0 if lewis_sum >= ((sum(composition.values()) - 1) * 2): rules["senior"] = 1 if lewis_sum < ((sum(composition.values()) - 1) * 2): rules["senior"] = 0 return rules