Beispiel #1
0
def LoadAllEstimators():
    db_public = SqliteDatabase('../data/public_data.sqlite')
    db_gibbs = SqliteDatabase('../res/gibbs.sqlite')

    if not db_gibbs.DoesTableExist('prc_pseudoisomers'):
        nist_regression = NistRegression(db_gibbs)
        nist_regression.Train()

    tables = {
        'alberty': (db_public, 'alberty_pseudoisomers', 'Alberty'),
        'PRC': (db_gibbs, 'prc_pseudoisomers', 'our method (PRC)')
    }
    estimators = {}
    for key, (db, table_name, thermo_name) in tables.iteritems():
        if db.DoesTableExist(table_name):
            estimators[key] = PsuedoisomerTableThermodynamics.FromDatabase(
                db, table_name, name=thermo_name)
        else:
            logging.warning('The table %s does not exist in %s' %
                            (table_name, str(db)))

    estimators['hatzi_gc'] = Hatzi(use_pKa=False)
    #estimators['hatzi_gc_pka'] = Hatzi(use_pKa=True)

    if db.DoesTableExist('bgc_pseudoisomers'):
        estimators['BGC'] = GroupContribution(db=db_gibbs, transformed=True)
        estimators['BGC'].init()
        estimators['BGC'].name = 'our method (BGC)'

    if db.DoesTableExist('pgc_pseudoisomers'):
        estimators['PGC'] = GroupContribution(db=db_gibbs, transformed=False)
        estimators['PGC'].init()
        estimators['PGC'].name = 'our method (PGC)'

    estimators['UGC'] = UnifiedGroupContribution(db=db_gibbs)
    estimators['UGC'].init()
    estimators['UGC'].name = 'our method (UGC)'

    estimators['C1'] = ReactionThermodynamics.FromCsv(
        '../data/thermodynamics/c1_reaction_thermodynamics.csv',
        estimators['alberty'])

    if 'PGC' in estimators:
        estimators['merged'] = BinaryThermodynamics(estimators['alberty'],
                                                    estimators['PGC'])
        estimators['merged_C1'] = BinaryThermodynamics(estimators['C1'],
                                                       estimators['PGC'])

    for thermo in estimators.values():
        thermo.load_bounds('../data/thermodynamics/concentration_bounds.csv')

    return estimators
def LoadAllEstimators():
    db_public = SqliteDatabase('../data/public_data.sqlite')
    db_gibbs = SqliteDatabase('../res/gibbs.sqlite')
    
    if not db_gibbs.DoesTableExist('prc_pseudoisomers'):
        nist_regression = NistRegression(db_gibbs)
        nist_regression.Train()

    tables = {'alberty': (db_public, 'alberty_pseudoisomers', 'Alberty'),
              'PRC': (db_gibbs, 'prc_pseudoisomers', 'our method (PRC)')}
    estimators = {}
    for key, (db, table_name, thermo_name) in tables.iteritems():
        if db.DoesTableExist(table_name):
            estimators[key] = PsuedoisomerTableThermodynamics.FromDatabase(
                                            db, table_name, name=thermo_name)
        else:
            logging.warning('The table %s does not exist in %s' % (table_name, str(db)))
    
    estimators['hatzi_gc'] = Hatzi(use_pKa=False)
    #estimators['hatzi_gc_pka'] = Hatzi(use_pKa=True)
    
    if db.DoesTableExist('bgc_pseudoisomers'):
        estimators['BGC'] = GroupContribution(db=db_gibbs, transformed=True)
        estimators['BGC'].init()
        estimators['BGC'].name = 'our method (BGC)'

    if db.DoesTableExist('pgc_pseudoisomers'):
        estimators['PGC'] = GroupContribution(db=db_gibbs, transformed=False)
        estimators['PGC'].init()
        estimators['PGC'].name = 'our method (PGC)'

    
    estimators['UGC'] = UnifiedGroupContribution(db=db_gibbs)
    estimators['UGC'].init()
    estimators['UGC'].name = 'our method (UGC)'

    
    estimators['C1'] = ReactionThermodynamics.FromCsv(
        '../data/thermodynamics/c1_reaction_thermodynamics.csv',
        estimators['alberty'])

    if 'PGC' in estimators:    
        estimators['merged'] = BinaryThermodynamics(estimators['alberty'],
                                                    estimators['PGC'])
        estimators['merged_C1'] = BinaryThermodynamics(estimators['C1'],
                                                       estimators['PGC'])

    for thermo in estimators.values():
        thermo.load_bounds('../data/thermodynamics/concentration_bounds.csv')

    return estimators
    def Initialize(self, db):
        from pygibbs.unified_group_contribution import UnifiedGroupContribution

        ugc = UnifiedGroupContribution(db)
        ugc.LoadGroups(FromDatabase=True)
        ugc.LoadObservations(FromDatabase=True)
        ugc.LoadGroupVectors(FromDatabase=True)
        ugc.LoadData(FromDatabase=True)
        ugc.init()
        
        self.groups_data = ugc.groups_data
        self.group_decomposer = ugc.group_decomposer

        result_dict = ugc._GetContributionData(ugc.S.copy(), ugc.cids,
                                               ugc.b.copy(), ugc.anchored)
        
        self.g_pgc = result_dict['group_contributions']
        self.P_L_pgc = result_dict['pgc_conservations']
    def Initialize(self, db):
        from pygibbs.unified_group_contribution import UnifiedGroupContribution

        ugc = UnifiedGroupContribution(db)
        ugc.LoadGroups(FromDatabase=True)
        ugc.LoadObservations(FromDatabase=True)
        ugc.LoadGroupVectors(FromDatabase=True)
        ugc.LoadData(FromDatabase=True)
        ugc.init()

        self.groups_data = ugc.groups_data
        self.group_decomposer = ugc.group_decomposer

        result_dict = ugc._GetContributionData(ugc.S.copy(), ugc.cids,
                                               ugc.b.copy(), ugc.anchored)

        self.g_pgc = result_dict['group_contributions']
        self.P_L_pgc = result_dict['pgc_conservations']
plt.xlabel('value in iAF1260 [kJ/mol]')
plt.ylabel('UGCM estimation [kJ/mol]')
plt.title('Unobserved data, N = %d, RMSE = %.1f [kJ/mol]' % (len(non_nist_idx), rms_feist_ugcm2))
plt.tight_layout()
plt.savefig(FIG_FNAME + "_fig2.svg", fmt='.svg')

plt.figure(figsize=(6, 6), dpi=90)
bins = np.arange(-30, 30, 2)
plt.hist([err_feist_nist, err_ugcm_nist], bins=bins, histtype='bar', cumulative=False, normed=False)
plt.xlabel('Error in kJ/mol')
plt.ylabel('# of reactions')
plt.legend(['value in iAF1260', 'UGCM estimation'])
plt.savefig(FIG_FNAME + "_fig3.svg", fmt='.svg')

db = SqliteDatabase('../res/gibbs.sqlite', 'w')
ugc = UnifiedGroupContribution(db)
ugc.LoadGroups(True)
ugc.LoadObservations(True)
ugc.LoadGroupVectors(True)
ugc.LoadData(True)
ugc.init()
r_list = []
#r_list += [Reaction.FromFormula("C00036 + C00044 = C00011 + C00035 + C00074")]
#r_list += [Reaction.FromFormula("C00003 + C00037 + C00101 = C00004 + C00011 + C00014 + C00080 + C00143")] # glycine synthase
r_list += [Reaction.FromFormula("C00001 + C00002 + C00064 + C04376 => C00008 + C00009 + C00025 + C04640")]
#r_list += [Reaction.FromFormula("C00001 + 2 C00002 + C00064 + C00288 <=> 2 C00008 + C00009 + C00025 + C00169")]


kegg = Kegg.getInstance()
S, cids = kegg.reaction_list_to_S(r_list)
def CalculateThermo():
    options, _ = MakeOpts().parse_args(sys.argv)

    if options.csv_output_filename is not None:
        out_fp = open(options.csv_output_filename, 'w')
        print "writing results to %s ... " % options.csv_output_filename
    else:
        out_fp = sys.stdout
    csv_writer = csv.writer(out_fp)
    csv_writer.writerow(['ID', 'error', 'nH', 'nMg', 'charge', 'dG0', 'kernel']) 

    db = SqliteDatabase('../res/gibbs.sqlite', 'w')
    ugc = UnifiedGroupContribution(db)
    ugc.LoadGroups(True)
    ugc.LoadObservations(True)
    ugc.LoadGroupVectors(True)
    ugc.LoadData(True)
    
    result_dict = ugc._GetContributionData(ugc.S.copy(), ugc.cids,
                                           ugc.b.copy(), ugc.anchored)
    
    g_pgc = result_dict['group_contributions']
    P_L_pgc = result_dict['pgc_conservations']

    sdfile = pybel.readfile("sdf", options.sdf_input_filename)
    for m in sdfile:
        try:
            try:
                mol = Molecule.FromOBMol(m.OBMol)
            except OpenBabelError:
                raise UnknownReactionEnergyError("Cannot convert to OBMol object")
            
            mol.title = m.title
            mol.RemoveHydrogens()
            if mol.GetNumAtoms() > 200:
                raise UnknownReactionEnergyError("Compound contains more than 200 atoms (n = %d)" % mol.GetNumAtoms())
            
            try:
                decomposition = ugc.group_decomposer.Decompose(mol, 
                                        ignore_protonations=False, strict=True)
            except GroupDecompositionError:
                raise UnknownReactionEnergyError("cannot decompose")
            
            groupvec = decomposition.AsVector()
            gv = np.matrix(groupvec.Flatten())
            dG0 = float(g_pgc * gv.T)
            nH = decomposition.Hydrogens()
            nMg = decomposition.Magnesiums()
            ker = list((P_L_pgc * gv.T).round(10).flat)
            try:
                diss_table = mol.GetDissociationTable()
                diss_table.SetFormationEnergyByNumHydrogens(
                        dG0=dG0, nH=nH, nMg=nMg)
            except MissingDissociationConstantError:
                raise UnknownReactionEnergyError("missing pKa data")
            pmap = diss_table.GetPseudoisomerMap()
            for p_nH, p_z, p_nMg, p_dG0 in pmap.ToMatrix():
                csv_writer.writerow([m.title, None, p_nH, p_z, p_nMg, round(p_dG0, 1), str(ker)])

        except UnknownReactionEnergyError as e:
            csv_writer.writerow([m.title, str(e), None, None, None, None, None])
        
        out_fp.flush()
Beispiel #7
0
plt.savefig(FIG_FNAME + "_fig2.svg", fmt='.svg')

plt.figure(figsize=(6, 6), dpi=90)
bins = np.arange(-30, 30, 2)
plt.hist([err_feist_nist, err_ugcm_nist],
         bins=bins,
         histtype='bar',
         cumulative=False,
         normed=False)
plt.xlabel('Error in kJ/mol')
plt.ylabel('# of reactions')
plt.legend(['value in iAF1260', 'UGCM estimation'])
plt.savefig(FIG_FNAME + "_fig3.svg", fmt='.svg')

db = SqliteDatabase('../res/gibbs.sqlite', 'w')
ugc = UnifiedGroupContribution(db)
ugc.LoadGroups(True)
ugc.LoadObservations(True)
ugc.LoadGroupVectors(True)
ugc.LoadData(True)
ugc.init()
r_list = []
#r_list += [Reaction.FromFormula("C00036 + C00044 = C00011 + C00035 + C00074")]
#r_list += [Reaction.FromFormula("C00003 + C00037 + C00101 = C00004 + C00011 + C00014 + C00080 + C00143")] # glycine synthase
r_list += [
    Reaction.FromFormula(
        "C00001 + C00002 + C00064 + C04376 => C00008 + C00009 + C00025 + C04640"
    )
]
#r_list += [Reaction.FromFormula("C00001 + 2 C00002 + C00064 + C00288 <=> 2 C00008 + C00009 + C00025 + C00169")]
Beispiel #8
0
def CalculateThermo():
    options, _ = MakeOpts().parse_args(sys.argv)

    if options.csv_output_filename is not None:
        out_fp = open(options.csv_output_filename, 'w')
        print "writing results to %s ... " % options.csv_output_filename
    else:
        out_fp = sys.stdout
    csv_writer = csv.writer(out_fp)
    csv_writer.writerow(
        ['ID', 'error', 'nH', 'nMg', 'charge', 'dG0', 'kernel'])

    db = SqliteDatabase('../res/gibbs.sqlite', 'w')
    ugc = UnifiedGroupContribution(db)
    ugc.LoadGroups(True)
    ugc.LoadObservations(True)
    ugc.LoadGroupVectors(True)
    ugc.LoadData(True)

    result_dict = ugc._GetContributionData(ugc.S.copy(), ugc.cids,
                                           ugc.b.copy(), ugc.anchored)

    g_pgc = result_dict['group_contributions']
    P_L_pgc = result_dict['pgc_conservations']

    sdfile = pybel.readfile("sdf", options.sdf_input_filename)
    for m in sdfile:
        try:
            try:
                mol = Molecule.FromOBMol(m.OBMol)
            except OpenBabelError:
                raise UnknownReactionEnergyError(
                    "Cannot convert to OBMol object")

            mol.title = m.title
            mol.RemoveHydrogens()
            if mol.GetNumAtoms() > 200:
                raise UnknownReactionEnergyError(
                    "Compound contains more than 200 atoms (n = %d)" %
                    mol.GetNumAtoms())

            try:
                decomposition = ugc.group_decomposer.Decompose(
                    mol, ignore_protonations=False, strict=True)
            except GroupDecompositionError:
                raise UnknownReactionEnergyError("cannot decompose")

            groupvec = decomposition.AsVector()
            gv = np.matrix(groupvec.Flatten())
            dG0 = float(g_pgc * gv.T)
            nH = decomposition.Hydrogens()
            nMg = decomposition.Magnesiums()
            ker = list((P_L_pgc * gv.T).round(10).flat)
            try:
                diss_table = mol.GetDissociationTable()
                diss_table.SetFormationEnergyByNumHydrogens(dG0=dG0,
                                                            nH=nH,
                                                            nMg=nMg)
            except MissingDissociationConstantError:
                raise UnknownReactionEnergyError("missing pKa data")
            pmap = diss_table.GetPseudoisomerMap()
            for p_nH, p_z, p_nMg, p_dG0 in pmap.ToMatrix():
                csv_writer.writerow([
                    m.title, None, p_nH, p_z, p_nMg,
                    round(p_dG0, 1),
                    str(ker)
                ])

        except UnknownReactionEnergyError as e:
            csv_writer.writerow(
                [m.title, str(e), None, None, None, None, None])

        out_fp.flush()
Beispiel #9
0
# This file reads the data stored in the gibbs.sqlite database and 

import os, sys
orig_dir = os.getcwd()
pygibbs_path, _ = os.path.split(orig_dir)
src_path, _ = os.path.split(pygibbs_path)
os.chdir(src_path)
print src_path
sys.path.append(src_path)

import numpy as np
from pygibbs.unified_group_contribution import UnifiedGroupContribution
from toolbox.database import SqliteDatabase

db = SqliteDatabase('../res/gibbs.sqlite', 'r')
ugc = UnifiedGroupContribution(db)
ugc.LoadGroups(FromDatabase=True)
ugc.LoadObservations(FromDatabase=True)
ugc.LoadGroupVectors(FromDatabase=True)
ugc.LoadData(FromDatabase=True)
ugc.init()
result_dict = ugc._GetContributionData(ugc.S.copy(), ugc.cids,
                                       ugc.b.copy(), ugc.anchored)

g_pgc = result_dict['group_contributions']
P_L_pgc = result_dict['pgc_conservations']

os.chdir(orig_dir)
print os.getcwd()
np.save('g_pgc.gz', g_pgc)
np.save('P_L_g_pgc.gz', P_L_pgc)
Beispiel #10
0
# This file reads the data stored in the gibbs.sqlite database and

import os, sys
orig_dir = os.getcwd()
pygibbs_path, _ = os.path.split(orig_dir)
src_path, _ = os.path.split(pygibbs_path)
os.chdir(src_path)
print src_path
sys.path.append(src_path)

import numpy as np
from pygibbs.unified_group_contribution import UnifiedGroupContribution
from toolbox.database import SqliteDatabase

db = SqliteDatabase('../res/gibbs.sqlite', 'r')
ugc = UnifiedGroupContribution(db)
ugc.LoadGroups(FromDatabase=True)
ugc.LoadObservations(FromDatabase=True)
ugc.LoadGroupVectors(FromDatabase=True)
ugc.LoadData(FromDatabase=True)
ugc.init()
result_dict = ugc._GetContributionData(ugc.S.copy(), ugc.cids, ugc.b.copy(),
                                       ugc.anchored)

g_pgc = result_dict['group_contributions']
P_L_pgc = result_dict['pgc_conservations']

os.chdir(orig_dir)
print os.getcwd()
np.save('g_pgc.gz', g_pgc)
np.save('P_L_g_pgc.gz', P_L_pgc)