Beispiel #1
0
 def test_nist_database_to_pyteomics(self):
     nist_database = nist_database_to_pyteomics(
         os.path.join(self.path, "beamspy", "data", "nist_database.txt"))
     self.assertEqual(nist_database["C"][0], (12.0, 1.0))
     self.assertEqual(nist_database["H"][0], (1.00782503223, 1.0))
     self.assertEqual(nist_database["N"][0], (14.00307400443, 1.0))
     self.assertEqual(nist_database["O"][0], (15.99491461957, 1.0))
     self.assertEqual(nist_database["P"][0], (30.97376199842, 1.0))
     self.assertEqual(nist_database["S"][0], (31.9720711744, 1.0))
def read_compounds(filename,
                   separator="\t",
                   calculate=True,
                   lib_adducts=[],
                   filename_atoms=""):

    if calculate:
        path_nist_database = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), 'data',
            'nist_database.txt')
        nist_database = nist_database_to_pyteomics(path_nist_database)

    df = read_csv(filename, sep=separator, float_precision="round_trip")
    records = []
    for index, row in df.iterrows():
        record = collections.OrderedDict()
        comp = pyteomics_mass.Composition(str(row.molecular_formula))
        if comp:
            record["composition"] = collections.OrderedDict(
                (k, comp[k]) for k in order_composition_by_hill(comp.keys()))
            sum_CHNOPS = sum(
                [comp[e] for e in comp if e in ["C", "H", "N", "O", "P", "S"]])
            record["CHNOPS"] = sum_CHNOPS == sum(list(comp.values()))
            if calculate:
                record["exact_mass"] = round(
                    pyteomics_mass.calculate_mass(formula=str(
                        str(row.molecular_formula)),
                                                  mass_data=nist_database), 6)
            else:
                record["exact_mass"] = float(row.exact_mass)

            record["compound_id"] = row.compound_id
            record["compound_name"] = row.compound_name
            comp = pyteomics_mass.Composition(str(row.molecular_formula))
            record["molecular_formula"] = composition_to_string(comp)

            if "retention_time" in df.columns:
                record["retention_time"] = row.retention_time
            elif "rt" in df.columns:
                record["retention_time"] = row.rt
            if "adduct" in df.columns:
                record["adduct"] = row.adduct
                if lib_adducts and calculate:
                    record["exact_mass"] += lib_adducts.lib[row.adduct]["mass"]

            records.append(record)
        else:
            Warning("{} Skipped".format(row))

    return records
def read_molecular_formulae(filename,
                            separator="\t",
                            calculate=True,
                            filename_atoms=""):

    if calculate:
        path_nist_database = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), 'data',
            'nist_database.txt')
        nist_database = nist_database_to_pyteomics(path_nist_database)

    df = read_csv(filename, sep=separator, float_precision="round_trip")
    records = []
    for index, row in df.iterrows():
        record = collections.OrderedDict()
        comp = pyteomics_mass.Composition(str(row.molecular_formula))
        if comp:
            record["composition"] = collections.OrderedDict(
                (k, comp[k]) for k in order_composition_by_hill(comp.keys()))
            sum_CHNOPS = sum(
                [comp[e] for e in comp if e in ["C", "H", "N", "O", "P", "S"]])
            record["CHNOPS"] = sum_CHNOPS == sum(list(comp.values()))
            if calculate:
                record["exact_mass"] = round(
                    pyteomics_mass.mass.calculate_mass(
                        formula=str(row.molecular_formula),
                        mass_data=nist_database), 6)
            else:
                record["exact_mass"] = float(row.exact_mass)
            record.update(HC_HNOPS_rules(str(row.molecular_formula)))
            record.update(lewis_senior_rules(str(row.molecular_formula)))
            record["double_bond_equivalents"] = double_bond_equivalents(
                record["composition"])
            records.append(record)
        else:
            Warning("{} Skipped".format(row))

    return records