def ExportJSONFiles(): options, _ = MakeOpts().parse_args(sys.argv) print "Using the database file: " + options.public_db print "Using the NIST table: " + options.nist_table print "Saving the data to the CSV file: " + options.output_csv db = SqliteDatabase(options.public_db) csv_writer = csv.writer(open(options.output_csv, 'w')) csv_writer.writerow([ 'url', 'reference_id', 'method', 'evaluation', 'ec', 'enzyme', 'kegg_reaction', 'reaction', 'K', 'K_tag', 'T (K)', 'I (M)', 'pH', 'pMg' ]) for row in db.DictReader(options.nist_table): csvrow = [ row[t] for t in [ 'url', 'reference_id', 'method', 'evaluation', 'ec', 'enzyme', 'kegg_reaction', 'reaction' ] ] csvrow += [ reformat_number_string(row['K'], '%.3e'), reformat_number_string(row['K_tag'], '%.3e'), reformat_number_string(row['T'], '%.2f'), reformat_number_string(row['I'], '%.2f'), reformat_number_string(row['pH'], '%.2f'), reformat_number_string(row['pMg'], '%.2f') ] csv_writer.writerow(csvrow)
def run(self): from toolbox.molecule import Molecule self.semaphore.acquire() start_time = time.time() logging.debug("SMILES: " + self.smiles) diss_table = Molecule._GetDissociationTable(self.smiles, fmt='smiles', mid_pH=default_pH, min_pKa=0, max_pKa=14, T=default_T) logging.debug("Min charge: %d" % diss_table.min_charge) logging.debug("Min nH: %d" % diss_table.min_nH) elapsed_time = time.time() - start_time self.db_lock.acquire() db = SqliteDatabase(self.options.db_file) kegg = Kegg.getInstance() name = kegg.cid2name(self.cid) if diss_table is not None: for row in diss_table.ToDatabaseRow(): db.Insert(self.options.table_name, [self.cid, name] + row) else: db.Insert(self.options.table_name, [self.cid, name] + [None] * 10) del db self.db_lock.release() logging.info("Completed C%05d, elapsed time = %.1f sec" % (self.cid, elapsed_time)) self.semaphore.release()
def __init__(self, T_range=(298, 314)): self.db = SqliteDatabase('../data/public_data.sqlite') self.kegg = Kegg.getInstance() self.T_range = T_range self.pH_range = None self.override_I = None self.override_pMg = None self.override_T = None self.FromDatabase() self.BalanceReactions()
def setUp(self): fake_csv_file = StringIO(CSV_DATA) csv_reader = csv.DictReader(fake_csv_file) self.fake_thermo_csv = PsuedoisomerTableThermodynamics() self.fake_thermo_csv = PsuedoisomerTableThermodynamics._FromDictReader( csv_reader, self.fake_thermo_csv, warn_for_conflicting_refs=False) db = SqliteDatabase(PUBLIC_DB_FNAME) db_reader = db.DictReader('fake_pseudoisomers') self.fake_thermo_db = PsuedoisomerTableThermodynamics() self.fake_thermo_db = PsuedoisomerTableThermodynamics._FromDictReader( db_reader, self.fake_thermo_db, warn_for_conflicting_refs=False)
def compare_charges(): #db_public = SqliteDatabase('../data/public_data.sqlite') db_gibbs = SqliteDatabase('../res/gibbs.sqlite') print "Writing Compare Charges report to ../res/groups_report.html" html_writer = HtmlWriter("../res/groups_report.html") kegg = Kegg.getInstance() #pH, I, pMg, T = default_pH, default_I, default_pMg, default_T pH, I, pMg, T = default_pH, 0, 14, default_T cid2error = {} for row_dict in db_gibbs.DictReader("gc_errors"): cid = int(row_dict['cid']) cid2error[cid] = row_dict['error'] estimators = {} estimators['hatzi'] = Hatzi(use_pKa=False) estimators['milo'] = PsuedoisomerTableThermodynamics.FromDatabase( db_gibbs, 'gc_pseudoisomers', name='Milo Group Contribution') all_cids = set(lsum([e.get_all_cids() for e in estimators.values()])) dict_list = [] for cid in all_cids: try: name = kegg.cid2name(cid) link = kegg.cid2compound(cid).get_link() except KeyError: name = "unknown" link = "" row_dict = { 'cid': '<a href="%s">C%05d</a>' % (link, cid), 'name': name, 'error': cid2error.get(cid, None) } for key, est in estimators.iteritems(): try: pmap = est.cid2PseudoisomerMap(cid) dG0, dG0_tag, nH, z, nMg = pmap.GetMostAbundantPseudoisomer( pH, I, pMg, T) except MissingCompoundFormationEnergy: dG0, dG0_tag, nH, z, nMg = "", "", "", "", "" row_dict['nH_' + key] = nH row_dict['charge_' + key] = z row_dict['nMg_' + key] = nMg row_dict['dG0_' + key] = dG0 row_dict['dG0_tag_' + key] = dG0_tag dict_list.append(row_dict) html_writer.write_table( dict_list, headers=['cid', 'name', 'charge_hatzi', 'charge_milo', 'error']) html_writer.close()
def LoadAllEstimators(): db_public = SqliteDatabase('../data/public_data.sqlite') db_gibbs = SqliteDatabase('../res/gibbs.sqlite') if not db_gibbs.DoesTableExist('prc_pseudoisomers'): nist_regression = NistRegression(db_gibbs) nist_regression.Train() tables = { 'alberty': (db_public, 'alberty_pseudoisomers', 'Alberty'), 'PRC': (db_gibbs, 'prc_pseudoisomers', 'our method (PRC)') } estimators = {} for key, (db, table_name, thermo_name) in tables.iteritems(): if db.DoesTableExist(table_name): estimators[key] = PsuedoisomerTableThermodynamics.FromDatabase( db, table_name, name=thermo_name) else: logging.warning('The table %s does not exist in %s' % (table_name, str(db))) estimators['hatzi_gc'] = Hatzi(use_pKa=False) #estimators['hatzi_gc_pka'] = Hatzi(use_pKa=True) if db.DoesTableExist('bgc_pseudoisomers'): estimators['BGC'] = GroupContribution(db=db_gibbs, transformed=True) estimators['BGC'].init() estimators['BGC'].name = 'our method (BGC)' if db.DoesTableExist('pgc_pseudoisomers'): estimators['PGC'] = GroupContribution(db=db_gibbs, transformed=False) estimators['PGC'].init() estimators['PGC'].name = 'our method (PGC)' estimators['UGC'] = UnifiedGroupContribution(db=db_gibbs) estimators['UGC'].init() estimators['UGC'].name = 'our method (UGC)' estimators['C1'] = ReactionThermodynamics.FromCsv( '../data/thermodynamics/c1_reaction_thermodynamics.csv', estimators['alberty']) if 'PGC' in estimators: estimators['merged'] = BinaryThermodynamics(estimators['alberty'], estimators['PGC']) estimators['merged_C1'] = BinaryThermodynamics(estimators['C1'], estimators['PGC']) for thermo in estimators.values(): thermo.load_bounds('../data/thermodynamics/concentration_bounds.csv') return estimators
def example_reductive(thermo): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/pathologic.html'), thermo=thermo, max_solutions=None, max_reactions=15, maximal_dG=0.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) add_cofactor_reactions(pl) add_redox_reactions(pl) r = Reaction.FromFormula("3 C00011 => C00022") #r.Balance() pl.find_path("reductive", r)
def example_oxidative(thermo): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/pathologic.html'), thermo=thermo, max_solutions=None, max_reactions=10, maximal_dG=0, thermodynamic_method=OptimizationMethods.MAX_TOTAL, update_file=None) add_cofactor_reactions(pl) add_redox_reactions(pl, NAD_only=False) r = Reaction.FromFormula("C00022 => 3 C00011") #r.Balance() pl.find_path("oxidative", r)
def __init__(self, html_fname): self.serv = None self.db = SqliteDatabase('channeling/channeling.sqlite', 'w') self.html_writer = HtmlWriter(html_fname) self.COMPOUND_TABLE_NAME = 'kegg_compounds' self.GENE_TABLE_NAME = 'kegg_genes' self.GENE_REACTION_TABLE_NAME = 'kegg_genes_to_reactions' self.REACTION_TABLE_NAME = 'kegg_reactions' self.EQUATION_TABLE_NAME = 'kegg_equations' self.STOICHIOMETRY_TABLE_NAME = 'kegg_stoichiometry' self.GIBBS_ENERGY_TABLE_NAME = 'kegg_gibbs_energies' self.GENE_ENERGY_TABLE_NAME = 'kegg_gene_energies' self.FUNCTIONAL_INTERATCTIONS_TABLE = 'parkinson_functional_interactions' self.GENE_PAIRS_TABLE_NAME = 'kegg_gene_pairs' self.COFACTOR_TABLE_NAME = 'kegg_cofactors'
def runBeta2Alpha(thermo, reactionList): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/Beta2Alpha.html'), thermo=thermo, max_solutions=None, max_reactions=15, maximal_dG=0.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) add_cofactor_reactions(pl) add_redox_reactions(pl) for r in reactionList: pl.add_reaction(Reaction.FromFormula(r, "Auto generate #%s" % hash(r))) r = Reaction.FromFormula("C00099 => C01401") pl.find_path("Beta2Alpha", r)
def GetC1Thermodynamics( html_writer, reaction_fname='../data/thermodynamics/c1_reaction_thermodynamics.csv' ): html_writer.write("<h1>C1 thermodynamics</h1>\n") dict_list = [] db_public = SqliteDatabase('../data/public_data.sqlite') alberty = PsuedoisomerTableThermodynamics.FromDatabase(\ db_public, 'alberty_pseudoisomers', name='alberty') alberty.AddPseudoisomer(101, nH=23, z=0, nMg=0, dG0=0) reacthermo = ReactionThermodynamics(alberty, 'C1') reacthermo.pH = 7 reacthermo.I = 0.1 reacthermo.T = 298.15 reacthermo.pMg = 14 c1_reactions = [] for row in csv.DictReader(open(reaction_fname, 'r')): r = Reaction.FromFormula(row['formula']) r.Balance(balance_water=False) r.SetNames(row['enzyme']) dG0_r_prime = float(row['dG0_r_prime']) pH, I, pMg, T = [float(row[k]) for k in ['pH', 'I', 'pMg', 'T']] reacthermo.AddReaction(r, dG0_r_prime, pH=pH, I=I, pMg=pMg, T=T) c1_reactions.append(r) row['formula'] = r.to_hypertext(show_cids=False) dict_list.append(row) html_writer.write_table( dict_list, headers=['acronym', 'enzyme', 'formula', 'dG0_r_prime']) reacthermo._Recalculate() return reacthermo
def MatchCRCDataToKEGG(): """ Reads the raw data collected from the CRC handbook and tries to map every compound name there to a KEGG compound ID. Then it writes the results to the 'Public' Database """ public_db = SqliteDatabase('../data/public_data.sqlite') kegg = Kegg.getInstance() cas2cid = {} for cid, comp in kegg.cid2compound_map.iteritems(): if comp.cas: cas2cid[comp.cas] = cid public_db.CreateTable('pKa_from_CRC', [ 'cas TEXT', 'cid INT', 'name TEXT', 'formula TEXT', 'T REAL', 'pKa REAL' ]) for row_dict in csv.DictReader( open('../data/thermodynamics/pKa_from_CRC.csv', 'r')): cas = row_dict['CAS'] name = row_dict['name'] formula = row_dict['formula'] if row_dict['T']: T = 273.15 + float(row_dict['T']) else: T = None if row_dict['pKa'][0] == '~': pKa = float(row_dict['pKa'][1:]) else: pKa = float(row_dict['pKa']) if cas not in cas2cid: logging.warning('Cannot find this CAS number (%s, %s) in KEGG' % (cas, name)) cid = None else: cid = cas2cid[cas] name = kegg.cid2name(cid) public_db.Insert( 'pKa_from_CRC', [unicode(cas), cid, unicode(name), unicode(formula), T, pKa]) public_db.Commit()
def example_lower_glycolysis(thermo): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/pathologic.html'), thermo=thermo, max_solutions=None, max_reactions=8, maximal_dG=0.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) add_cofactor_reactions(pl) add_redox_reactions(pl) #r = Reaction.FromFormula("C00003 + C00118 + C00001 => C00022 + C00004 + C00009") r = Reaction.FromFormula("C00118 => C00022") #r.Balance() pl.find_path("GAP => PYR", r)
def example_glycolysis(thermo): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/pathologic.html'), thermo=thermo, max_solutions=None, max_reactions=15, maximal_dG=0.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) add_cofactor_reactions(pl, free_ATP_hydrolysis=False) ban_toxic_compounds(pl) #add_carbon_counts(pl) #r = Reaction.FromFormula("C00031 => 6 C06265") r = Reaction.FromFormula("C00031 + 3 C00008 => 2 C00186 + 3 C00002") #r.Balance() pl.find_path("GLC => 2 LAC, 3 ATP, No methylglyoxal", r)
def example_rpi_bypass(thermo): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/pathologic.html'), thermo=thermo, max_solutions=None, max_reactions=10, maximal_dG=0.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) add_cofactor_reactions(pl) #add_redox_reactions(pl) pl.delete_reaction(1056) # ribose-phosphate isomerase pl.delete_reaction(1081) # ribose isomerase r = Reaction.FromFormula("C00117 => C01182") #r.Balance() pl.find_path("rpi_bypass", r)
def example_three_acetate(thermo): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/pathologic.html'), thermo=thermo, max_solutions=None, max_reactions=20, maximal_dG=0.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) add_cofactor_reactions(pl) #add_redox_reactions(pl) pl.delete_reaction(761) # F6P + Pi = E4P + acetyl-P pl.delete_reaction(1621) # X5P + Pi = GA3P + acetyl-P r = Reaction.FromFormula("C00031 => 3 C00033") #r.Balance() pl.find_path("three_acetate", r)
def example_more_than_two_pyruvate(thermo): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/pathologic.html'), thermo=thermo, max_solutions=None, max_reactions=20, maximal_dG=0.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) #add_cofactor_reactions(pl) #add_XTP_reactions(pl, '=>') #add_redox_reactions(pl) #pl.delete_reaction(761) # F6P + Pi = E4P + acetyl-P #pl.delete_reaction(1621) # X5P + Pi = GA3P + acetyl-P r = Reaction.FromFormula("3 C00031 + 3 C00011 + C00003 => 7 C00022 + 3 C00001 + C00004") r.Balance() pl.find_path("more_than_two_pyr", r)
def example_glucose_to_ethanol_and_formate(thermo): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/pathologic.html'), thermo=thermo, max_solutions=None, max_reactions=15, maximal_dG=0.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) #add_cofactor_reactions(pl) #add_XTP_reactions(pl, '=>') #add_redox_reactions(pl) #pl.delete_reaction(761) # F6P + Pi = E4P + acetyl-P #pl.delete_reaction(1621) # X5P + Pi = GA3P + acetyl-P r = Reaction.FromFormula("2 C00031 + 3 C00001 => 6 C00058 + 3 C00469") r.Balance() pl.find_path("glucose_to_ethanol_and_formate", r)
def main(): options, _ = MakeOpts().parse_args(sys.argv) db = SqliteDatabase("../res/gibbs.sqlite") public_db = SqliteDatabase("../data/public_data.sqlite") output_filename = os.path.abspath(options.output_filename) logging.info('Will write output to %s' % output_filename) html_writer = HtmlWriter(output_filename) nist = Nist(T_range=None) nist_regression = NistRegression(db, html_writer=html_writer, nist=nist) nist_regression.std_diff_threshold = 5 # the threshold over which to print an analysis of a reaction #nist_regression.nist.T_range = None(273.15 + 24, 273.15 + 40) #nist_regression.nist.override_I = 0.25 #nist_regression.nist.override_pMg = 14.0 html_writer.write("<h2>NIST regression:</h2>") if options.use_prior: logging.info('Using the data from Alberty as fixed prior') prior_thermo = PsuedoisomerTableThermodynamics.FromDatabase( public_db, 'alberty_pseudoisomers', name="Alberty") else: prior_thermo = None html_writer.write('</br><b>Regression Tables</b>\n') html_writer.insert_toggle(start_here=True) nist_regression.Train(options.from_database, prior_thermo) html_writer.div_end() html_writer.write('</br><b>PRC results</b>\n') html_writer.insert_toggle(start_here=True) nist_regression.WriteDataToHtml(html_writer) html_writer.div_end() html_writer.write('</br><b>Transformed reaction energies - PRC vs. Observed</b>\n') html_writer.insert_toggle(start_here=True) N, rmse = nist_regression.VerifyResults() html_writer.div_end() logging.info("Regression results for transformed data:") logging.info("N = %d, RMSE = %.1f" % (N, rmse)) html_writer.close()
def test_single_modules(mids): from pygibbs.groups import GroupContribution db = SqliteDatabase('../res/gibbs.sqlite') html_writer = HtmlWriter("../res/thermodynamic_module_analysis.html") gc = GroupContribution(db, html_writer) gc.init() for mid in mids: html_writer.write("<h2>M%05d</h2>\n" % mid) S, rids, fluxes, cids = gc.kegg.get_module(mid) thermodynamic_pathway_analysis(S, rids, fluxes, cids, gc, html_writer)
def example_formate(thermo, product_cid=22, co2_conc=1e-5): co2_hydration = Reaction.FromFormula("C00011 + C00001 => C00288") co2_hydration_dG0_prime = float(thermo.GetTransfromedKeggReactionEnergies([co2_hydration])) carbonate_conc = co2_conc * np.exp(-co2_hydration_dG0_prime / (R*default_T)) thermo.bounds[11] = (co2_conc, co2_conc) thermo.bounds[288] = (carbonate_conc, carbonate_conc) pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/pathologic.html'), thermo=thermo, max_solutions=None, max_reactions=20, maximal_dG=0.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) add_cofactor_reactions(pl, free_ATP_hydrolysis=True) add_redox_reactions(pl, NAD_only=False) pl.delete_reaction(134) # formate:NADP+ oxidoreductase pl.delete_reaction(519) # Formate:NAD+ oxidoreductase pl.delete_reaction(24) # Rubisco pl.delete_reaction(581) # L-serine:NAD+ oxidoreductase (deaminating) pl.delete_reaction(220) # L-serine ammonia-lyase pl.delete_reaction(13) # glyoxylate carboxy-lyase (dimerizing; tartronate-semialdehyde-forming) pl.delete_reaction(585) # L-Serine:pyruvate aminotransferase pl.delete_reaction(1440) # D-Xylulose-5-phosphate:formaldehyde glycolaldehydetransferase pl.delete_reaction(5338) # 3-hexulose-6-phosphate synthase pl.add_reaction(Reaction.FromFormula("C06265 => C00011", name="CO2 uptake")) pl.add_reaction(Reaction.FromFormula("C06265 => C00288", name="carbonate uptake")) pl.add_reaction(Reaction.FromFormula("C06265 => C00058", name="formate uptake")) r = Reaction.FromFormula("5 C06265 + C00058 => C%05d" % product_cid) # at least one formate to product #r.Balance() kegg = Kegg.getInstance() pl.find_path("formate to %s" % kegg.cid2name(product_cid), r)
def ExportJSONFiles(): estimators = LoadAllEstimators() options, _ = MakeOpts(estimators).parse_args(sys.argv) thermo_list = [] thermo_list.append(estimators[options.thermodynamics_source]) thermo_list.append( PsuedoisomerTableThermodynamics.FromCsvFile( options.thermodynamics_csv)) # Make sure we have all the data. kegg = Kegg.getInstance() for i, thermo in enumerate(thermo_list): print "Priority %d - formation energies of: %s" % (i + 1, thermo.name) kegg.AddThermodynamicData(thermo, priority=(i + 1)) db = SqliteDatabase('../res/gibbs.sqlite') print 'Exporting Group Contribution Nullspace matrix as JSON.' nullspace_vectors = [] for row in db.DictReader('ugc_conservations'): d = {'msg': row['msg']} sparse = json.loads(row['json']) d['reaction'] = [] for cid, coeff in sparse.iteritems(): d['reaction'].append([coeff, "C%05d" % int(cid)]) nullspace_vectors.append(d) WriteJSONFile(nullspace_vectors, options.nullspace_out_filename) print 'Exporting KEGG compounds as JSON.' WriteJSONFile(kegg.AllCompounds(), options.compounds_out_filename) print 'Exporting KEGG reactions as JSON.' WriteJSONFile(kegg.AllReactions(), options.reactions_out_filename) print 'Exporting KEGG enzymes as JSON.' WriteJSONFile(kegg.AllEnzymes(), options.enzymes_out_filename)
def runPathologic(thermo, reactionList): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/mog_finder.html'), thermo=thermo, max_solutions=None, max_reactions=15, maximal_dG=-3.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) add_cofactor_reactions(pl) add_redox_reactions(pl) for r in reactionList: pl.add_reaction(Reaction.FromFormula(r, "Auto generate #%s" % hash(r))) pl.delete_reaction(134) pl.delete_reaction(344) pl.delete_reaction(575) pl.delete_reaction(212) #pl.add_reaction(Reaction.FromFormula('C00149 + C00006 <=> C00036 + C00005 + C00080', # 'malate + NADP+ = oxaloacetate + NADPH',343)) #pl.add_reaction(Reaction.FromFormula('C00222 + C00010 + C00006 <=> C00083 + C00005', # 'malonate-semialdehyde + CoA + NADP+ = malonyl-CoA + NADPH',740)) r = Reaction.FromFormula("2 C00288 => C00048") pl.find_path("MOG_finder", r)
def main(): pH, pMg, I, T = (7.0, 3, 0.1, 298.15) db = SqliteDatabase('../res/gibbs.sqlite') kegg = Kegg.getInstance() alberty = PsuedoisomerTableThermodynamics( '../data/thermodynamics/alberty_pseudoisomers.csv') cids = alberty.get_all_cids() dG0_f = pylab.zeros((len(cids), 1)) for i, cid in enumerate(cids): dG0_f[i, 0] = alberty.cid2dG0_tag(cid, pH=pH, pMg=pMg, I=I, T=T) S = pylab.zeros((0, len(cids))) rids = [] ec_numbers = [] for rid in kegg.get_all_rids(): sparse = kegg.rid2sparse_reaction(rid) if not set(cids).issuperset(sparse.keys()): continue rids.append(rid) ec_numbers.append(kegg.rid2ec_list(rid)) S_row = pylab.zeros((1, len(cids))) for cid, coeff in sparse.iteritems(): S_row[0, cids.index(cid)] = coeff S = pylab.vstack([S, S_row]) dG0_r = pylab.dot(S, dG0_f) util._mkdir('../res/arren') s_writer = csv.writer(open('../res/arren/stoichiomety.csv', 'w')) r_writer = csv.writer(open('../res/arren/reactions.csv', 'w')) e_writer = csv.writer(open('../res/arren/ec_numbers.csv', 'w')) r_writer.writerow(['rid', 'dG0_r']) e_writer.writerow(['rid', 'ec0', 'ec1', 'ec2', 'ec3']) for i in xrange(S.shape[0]): s_writer.writerow(["%d" % x for x in S[i, :]]) for ec in ec_numbers[i].split(';'): e_writer.writerow(['%d' % rids[i]] + ec.split('.')) r_writer.writerow(["%d" % rids[i], '%.1f' % dG0_r[i, 0]]) c_writer = csv.writer(open('../res/arren/compounds.csv', 'w')) c_writer.writerow(['cid', 'dG0_f']) for j in xrange(len(cids)): c_writer.writerow(['%d' % cids[j], '%.1f' % dG0_f[j, 0]])
def ExportJSONFiles(): estimators = LoadAllEstimators() options, _ = MakeOpts(estimators).parse_args(sys.argv) thermo = estimators[options.thermodynamics_source] print "Using the thermodynamic estimations of: " + thermo.name # Make sure we have all the data. kegg = Kegg.getInstance() kegg.AddThermodynamicData(estimators['alberty'], priority=1) kegg.AddThermodynamicData(thermo, priority=2) db = SqliteDatabase('../res/gibbs.sqlite') kegg.AddGroupVectorData(db, table_name='pgc_groupvector') print 'Exporting KEGG compound pseudoisomers as JSON.' WriteJSONFile(kegg.AllCompounds(), options.out_filename)
def CreateDummyDB(): db = SqliteDatabase('/tmp/dummy.sqlite', 'w') db.CreateTable('tecan_readings', 'exp_id TEXT, plate TEXT, reading_label TEXT, row INT, col INT, time INT, measurement REAL', drop_if_exists=False) db.CreateTable('tecan_labels', 'exp_id TEXT, plate INT, row INT, col INT, label TEXT', drop_if_exists=False) db.CreateTable('tecan_plates', 'exp_id TEXT, plate INT, description TEXT, owner TEXT, project TEXT', drop_if_exists=False) db.CreateTable('tecan_experiments', 'exp_id TEXT, serial_number TEXT, desciption TEXT', drop_if_exists=False) db.CreateTable('tecan_scripts', 'exp_id TEXT, script BLOB', drop_if_exists=False) return db
def dissociation_decomposition_test(): """ Verifies that the decomposition of the compounds in the dissociation table match the nH of each species. """ db = SqliteDatabase('../res/gibbs.sqlite') dissociation = DissociationConstants.FromPublicDB() groups_data = GroupsData.FromDatabase(db) group_decomposer = GroupDecomposer(groups_data) kegg = Kegg.getInstance() for cid in dissociation.GetAllCids(): id = "C%05d (%s)" % (cid, kegg.cid2name(cid)) if kegg.cid2compound(cid).get_atom_bag() is None: logging.debug('%s: has no explicit formula' % id) else: diss = dissociation.GetDissociationTable(cid, create_if_missing=False) test_dissociation_table(diss, group_decomposer, id, ignore_missing_smiles=True)
def main(): estimators = LoadAllEstimators() args, _ = MakeOpts(estimators).parse_args(sys.argv) # Make sure we have all the data. db = SqliteDatabase('../res/gibbs.sqlite') G = GroupContribution(db=db, html_writer=NullHtmlWriter(), transformed=args.transformed) G.init() print 'Exporting KEGG compounds to %s' % args.compounds_out_filename csv_writer = csv.writer(open(args.compounds_out_filename, 'w')) csv_writer.writerow(["KEGG ID", "nH", "CHARGE", "nMg", "dG0_f"]) for cid in sorted(G.get_all_cids()): try: for nH, z, nMg, dG0 in G.cid2PseudoisomerMap(cid).ToMatrix(): csv_writer.writerow(["C%05d" % cid, nH, z, nMg, "%.1f" % dG0]) except MissingCompoundFormationEnergy as e: csv_writer.writerow(["C%05d" % cid, None, None, None, str(e)]) print 'Exporting KEGG reactions to %s' % args.reactions_out_filename csv_writer = csv.writer(open(args.reactions_out_filename, 'w')) csv_writer.writerow(["KEGG ID", "dG'0_r (pH=%.1f, I=%.2f, pMg=%.1f, T=%.1f)" % (args.ph, args.i_s, args.pmg, args.temp)]) for rid in sorted(G.kegg.get_all_rids()): reaction = G.kegg.rid2reaction(rid) try: reaction.Balance(balance_water=True) dG0_r = reaction.PredictReactionEnergy(G, pH=args.ph, pMg=args.pmg, I=args.i_s, T=args.temp) csv_writer.writerow(["R%05d" % rid, "%.1f" % dG0_r]) except (KeggParseException, MissingCompoundFormationEnergy, KeggReactionNotBalancedException, MissingReactionEnergy, KeyError, OpenBabelError) as e: csv_writer.writerow(["R%05d" % rid, str(e)])
def __init__(self, use_pKa=True): if use_pKa: Thermodynamics.__init__(self, "Jankowski et al. (+pKa)") self.dissociation = DissociationConstants.FromPublicDB() else: Thermodynamics.__init__(self, "Jankowski et al.") self.dissociation = None self.db = SqliteDatabase('../res/gibbs.sqlite', 'w') self.cid2pmap_dict = {} # the conditions in which Hatzimanikatis makes his predictions self.Hatzi_pH = 7.0 self.Hatzi_I = 0.0 self.Hatzi_pMg = 14.0 self.Hatzi_T = 298.15 self.kegg = Kegg.getInstance() # for some reason, Hatzimanikatis doesn't indicate that H+ is zero, # so we add it here H_pmap = PseudoisomerMap() H_pmap.Add(0, 0, 0, 0) self.SetPseudoisomerMap(80, H_pmap) self.cid2dG0_tag_dict = {80: 0} self.cid2charge_dict = {80: 0} for row in csv.DictReader(open(HATZI_CSV_FNAME, 'r')): cid = int(row['ENTRY'][1:]) self.cid2source_string[cid] = 'Jankowski et al. 2008' if row['DELTAG'] == "Not calculated": continue if cid == 3178: # this compound, which is supposed to be "Tetrahydroxypteridine" # seems to be mapped to something else by Hatzimanikatis continue self.cid2dG0_tag_dict[cid] = float(row['DELTAG']) * J_per_cal self.cid2charge_dict[cid] = int(row['CHARGE'])
def nist_dissociation_test(): """ Verifies that all the compounds in NIST are covered by the dissociation table, including SMILES strings. """ db = SqliteDatabase('../res/gibbs.sqlite') nist_regression = NistRegression(db, html_writer=NullHtmlWriter()) dissociation = nist_regression.dissociation groups_data = GroupsData.FromDatabase(db) group_decomposer = GroupDecomposer(groups_data) kegg = Kegg.getInstance() nist = nist_regression.nist for cid in nist.GetAllCids(): id = "C%05d (%s)" % (cid, kegg.cid2name(cid)) if kegg.cid2compound(cid).get_atom_bag() is None: logging.debug('%s: has no explicit formula' % id) else: diss = dissociation.GetDissociationTable(cid, create_if_missing=False) test_dissociation_table(diss, group_decomposer, id, ignore_missing_smiles=False)