Esempio n. 1
0
 def GetDissociationConstants(self):
     """
         Since loading the pKas takes time, this function is a lazy initialization
         of self.dissociation.
     """
     if self.dissociation is None:
         self.dissociation = DissociationConstants.FromPublicDB()
     return self.dissociation
Esempio n. 2
0
def main():
    parser = MakeOpts()
    args = parser.parse_args()
    dissociation = DissociationConstants.FromPublicDB()
    groups_data = GroupsData.FromGroupsFile(args.groups_species,
                                            transformed=False)
    group_decomposer = GroupDecomposer(groups_data)

    while DecomposeInputString(group_decomposer, dissociation):
        pass
Esempio n. 3
0
def dissociation_decomposition_test():
    """
        Verifies that the decomposition of the compounds in the dissociation table match the nH of each species.
    """
    db = SqliteDatabase('../res/gibbs.sqlite')
    dissociation = DissociationConstants.FromPublicDB()
    groups_data = GroupsData.FromDatabase(db)
    group_decomposer = GroupDecomposer(groups_data)
    kegg = Kegg.getInstance()

    for cid in dissociation.GetAllCids():
        id = "C%05d (%s)" % (cid, kegg.cid2name(cid))
        if kegg.cid2compound(cid).get_atom_bag() is None:
            logging.debug('%s: has no explicit formula' % id)
        else:
            diss = dissociation.GetDissociationTable(cid,
                                                     create_if_missing=False)
            test_dissociation_table(diss,
                                    group_decomposer,
                                    id,
                                    ignore_missing_smiles=True)
def main():
    pH, I, pMg, T = 7.0, 0.25, 14.0, 298.15

    dissociation = DissociationConstants.FromPublicDB()
    kegg = Kegg.getInstance()
    obs_fname = "../data/thermodynamics/formation_energies.csv"
    res_fname = "../res/formation_energies_transformed.csv"

    train_species = PsuedoisomerTableThermodynamics.FromCsvFile(
        obs_fname, label='testing')
    csv_out = csv.writer(open(res_fname, 'w'))
    csv_out.writerow([
        'cid', 'name', "dG'0", 'pH', 'I', 'pMg', 'T', 'anchor', 'compound_ref',
        'remark'
    ])
    for cid in train_species.get_all_cids():
        pmap = train_species.cid2PseudoisomerMap(cid)
        source = train_species.cid2source_string[cid]
        pmatrix = pmap.ToMatrix(
        )  # ToMatrix returns tuples of (nH, z, nMg, dG0)
        if len(pmatrix) != 1:
            raise Exception("multiple training species for C%05d" % cid)
        nH, charge, nMg, dG0 = pmatrix[0]
        name = "%s (%d)" % (kegg.cid2name(cid), nH)
        logging.info('Adding the formation energy of %s', name)
        diss_table = dissociation.GetDissociationTable(cid,
                                                       create_if_missing=True)
        if diss_table is None:
            raise Exception("%s [C%05d, nH=%d, nMg=%d] does not have a "
                            "dissociation table" % (name, cid, nH, nMg))

        diss_table.SetFormationEnergyByNumHydrogens(dG0, nH, nMg)
        diss_table.SetCharge(nH, charge, nMg)
        dG0_prime = diss_table.Transform(pH, I, pMg, T)
        csv_out.writerow([
            cid,
            kegg.cid2name(cid),
            "%.1f" % dG0_prime, pH, I, pMg, T, True, source, None
        ])
Esempio n. 5
0
    def __init__(self, use_pKa=True):
        if use_pKa:
            Thermodynamics.__init__(self, "Jankowski et al. (+pKa)")
            self.dissociation = DissociationConstants.FromPublicDB()
        else:
            Thermodynamics.__init__(self, "Jankowski et al.")
            self.dissociation = None
        self.db = SqliteDatabase('../res/gibbs.sqlite', 'w')
        self.cid2pmap_dict = {}

        # the conditions in which Hatzimanikatis makes his predictions
        self.Hatzi_pH = 7.0
        self.Hatzi_I = 0.0
        self.Hatzi_pMg = 14.0
        self.Hatzi_T = 298.15

        self.kegg = Kegg.getInstance()

        # for some reason, Hatzimanikatis doesn't indicate that H+ is zero,
        # so we add it here
        H_pmap = PseudoisomerMap()
        H_pmap.Add(0, 0, 0, 0)
        self.SetPseudoisomerMap(80, H_pmap)

        self.cid2dG0_tag_dict = {80: 0}
        self.cid2charge_dict = {80: 0}

        for row in csv.DictReader(open(HATZI_CSV_FNAME, 'r')):
            cid = int(row['ENTRY'][1:])
            self.cid2source_string[cid] = 'Jankowski et al. 2008'
            if row['DELTAG'] == "Not calculated":
                continue
            if cid == 3178:
                # this compound, which is supposed to be "Tetrahydroxypteridine"
                # seems to be mapped to something else by Hatzimanikatis
                continue
            self.cid2dG0_tag_dict[cid] = float(row['DELTAG']) * J_per_cal
            self.cid2charge_dict[cid] = int(row['CHARGE'])
Esempio n. 6
0
import csv
import numpy as np
from pygibbs.dissociation_constants import DissociationConstants,\
    MissingDissociationConstantError


(pH, I, pMg, T) = (7.0, 0.1, 14, 298.15)

nist_csv = csv.writer(open('../res/nist_rt_data.tsv', 'w'), delimiter='\t')
nist_csv.writerow(('URL', "dG'0", 'pH', 'I', 'T', 'dG0', 'ddG0', 'match in iAF1260'))

feist_csv = csv.writer(open('../res/nist_feist_match.tsv', 'w'), delimiter='\t')
feist_csv.writerow(('name', "dG'0", 'pH', 'I', 'T', 'dG0', 'ddG0'))

nist = Nist()
dissociation = DissociationConstants.FromPublicDB()
cid2nH_nMg = dissociation.GetCid2nH_nMg(pH, I, pMg, T)

for i, r in enumerate(Feist.FromFiles().reactions):
    if r.name in ['NTPP9', 'DHQS', 'AICART']: # these reactions were not in NIST when the UGCM model was created
        continue
    
    nist_rows = nist.SelectRowsFromNist(r, check_reverse=True)
    if nist_rows == []:
        continue

    try:
        print r.name
        
        dG0_list = []
        for row in nist_rows:
    def EstimateKeggCids(self):
        result_dict = self._GetContributionData(self.S.copy(), self.cids,
                                                self.b.copy(), self.anchored)

        g_pgc = result_dict['group_contributions']
        P_L_bad = result_dict['bad_conservations']
        P_L_pgc = result_dict['pgc_conservations']
        G_resid = result_dict['pgc_groupvectors']
        g_tot = result_dict['total_contributions']

        diss = DissociationConstants.FromPublicDB()
        all_cids = sorted(self.kegg.get_all_cids())

        n_bad = P_L_bad.shape[0]
        n_pgc = P_L_pgc.shape[0]
        self.P_L_tot = np.matrix(np.zeros((n_bad + n_pgc, len(all_cids))))

        for c, cid in enumerate(all_cids):
            if cid not in self.cid2nH_nMg:
                self.cid2error[cid] = "No pKa data"
                continue

            nH, nMg = self.cid2nH_nMg[cid]
            if cid in self.cids:
                i = self.cids.index(cid)
                dG0 = g_tot[0, i]
                self.cid2source_string[cid] = "Unified Group Contribution"
                self.P_L_tot[:n_bad, c] = P_L_bad[:, i]
                self.P_L_tot[n_bad:, c] = P_L_pgc * G_resid[i, :].T
            elif self.cid2groupvec[cid] is not None:
                gv = np.matrix(self.cid2groupvec[cid].Flatten())
                dG0 = float(g_pgc * gv.T)
                self.cid2source_string[cid] = "Group Contribution"
                self.P_L_tot[n_bad:, c] = P_L_pgc * gv.T
            else:
                self.cid2error[cid] = "no groupvector"
                continue

            diss_table = diss.GetDissociationTable(cid)
            if diss_table is not None:
                diss_table.SetFormationEnergyByNumHydrogens(dG0=dG0,
                                                            nH=nH,
                                                            nMg=nMg)
                pmap = diss_table.GetPseudoisomerMap()
            else:
                pmap = PseudoisomerMap()
                pmap.Add(nH=nH, z=0, nMg=nMg, dG0=dG0)
            self.SetPseudoisomerMap(cid, pmap)

        conservation_rows = []
        for i in xrange(self.P_L_tot.shape[0]):
            row_i = self.P_L_tot[i, :]
            c_active = sorted((abs(row_i) > self.epsilon).nonzero()[1].flat)
            if len(c_active) == 0:
                continue
            row_i = row_i * (1.0 / row_i[0, c_active[0]])
            row_i = row_i.round(10)

            # normalize reaction such that the coefficient of the smallest CID is 1
            sparse = dict((all_cids[c], row_i[0, c]) for c in c_active)
            if len(sparse) > 0:
                json_str = json.dumps(sparse)
                if i < n_bad:
                    conservation_rows.append(
                        ('missing structures and unknown reactant combination',
                         json_str))
                else:
                    conservation_rows.append(
                        ('unknown reactant and group combination', json_str))

        self.db.CreateTable(self.CONSERVATIONS_TABLE_NAME,
                            'msg TEXT, json TEXT')
        conservation_rows = sorted(set(conservation_rows))
        for msg, json_str in conservation_rows:
            self.db.Insert(self.CONSERVATIONS_TABLE_NAME, [msg, json_str])
        self.ToDatabase(self.db, self.THERMODYNAMICS_TABLE_NAME,
                        self.ERRORS_TABLE_NAME)
        self.db.Commit()
Esempio n. 8
0
 def GetDissociation(self):
     if self.dissociation is None:
         self.dissociation = DissociationConstants.FromPublicDB()
     return self.dissociation