def test_nist_database_to_pyteomics(self): nist_database = nist_database_to_pyteomics( os.path.join(self.path, "beamspy", "data", "nist_database.txt")) self.assertEqual(nist_database["C"][0], (12.0, 1.0)) self.assertEqual(nist_database["H"][0], (1.00782503223, 1.0)) self.assertEqual(nist_database["N"][0], (14.00307400443, 1.0)) self.assertEqual(nist_database["O"][0], (15.99491461957, 1.0)) self.assertEqual(nist_database["P"][0], (30.97376199842, 1.0)) self.assertEqual(nist_database["S"][0], (31.9720711744, 1.0))
def read_compounds(filename, separator="\t", calculate=True, lib_adducts=[], filename_atoms=""): if calculate: path_nist_database = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'data', 'nist_database.txt') nist_database = nist_database_to_pyteomics(path_nist_database) df = read_csv(filename, sep=separator, float_precision="round_trip") records = [] for index, row in df.iterrows(): record = collections.OrderedDict() comp = pyteomics_mass.Composition(str(row.molecular_formula)) if comp: record["composition"] = collections.OrderedDict( (k, comp[k]) for k in order_composition_by_hill(comp.keys())) sum_CHNOPS = sum( [comp[e] for e in comp if e in ["C", "H", "N", "O", "P", "S"]]) record["CHNOPS"] = sum_CHNOPS == sum(list(comp.values())) if calculate: record["exact_mass"] = round( pyteomics_mass.calculate_mass(formula=str( str(row.molecular_formula)), mass_data=nist_database), 6) else: record["exact_mass"] = float(row.exact_mass) record["compound_id"] = row.compound_id record["compound_name"] = row.compound_name comp = pyteomics_mass.Composition(str(row.molecular_formula)) record["molecular_formula"] = composition_to_string(comp) if "retention_time" in df.columns: record["retention_time"] = row.retention_time elif "rt" in df.columns: record["retention_time"] = row.rt if "adduct" in df.columns: record["adduct"] = row.adduct if lib_adducts and calculate: record["exact_mass"] += lib_adducts.lib[row.adduct]["mass"] records.append(record) else: Warning("{} Skipped".format(row)) return records
def read_molecular_formulae(filename, separator="\t", calculate=True, filename_atoms=""): if calculate: path_nist_database = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'data', 'nist_database.txt') nist_database = nist_database_to_pyteomics(path_nist_database) df = read_csv(filename, sep=separator, float_precision="round_trip") records = [] for index, row in df.iterrows(): record = collections.OrderedDict() comp = pyteomics_mass.Composition(str(row.molecular_formula)) if comp: record["composition"] = collections.OrderedDict( (k, comp[k]) for k in order_composition_by_hill(comp.keys())) sum_CHNOPS = sum( [comp[e] for e in comp if e in ["C", "H", "N", "O", "P", "S"]]) record["CHNOPS"] = sum_CHNOPS == sum(list(comp.values())) if calculate: record["exact_mass"] = round( pyteomics_mass.mass.calculate_mass( formula=str(row.molecular_formula), mass_data=nist_database), 6) else: record["exact_mass"] = float(row.exact_mass) record.update(HC_HNOPS_rules(str(row.molecular_formula))) record.update(lewis_senior_rules(str(row.molecular_formula))) record["double_bond_equivalents"] = double_bond_equivalents( record["composition"]) records.append(record) else: Warning("{} Skipped".format(row)) return records