Ejemplo n.º 1
0
def ExportJSONFiles():
    options, _ = MakeOpts().parse_args(sys.argv)
    print "Using the database file: " + options.public_db
    print "Using the NIST table: " + options.nist_table
    print "Saving the data to the CSV file: " + options.output_csv

    db = SqliteDatabase(options.public_db)
    csv_writer = csv.writer(open(options.output_csv, 'w'))
    csv_writer.writerow([
        'url', 'reference_id', 'method', 'evaluation', 'ec', 'enzyme',
        'kegg_reaction', 'reaction', 'K', 'K_tag', 'T (K)', 'I (M)', 'pH',
        'pMg'
    ])
    for row in db.DictReader(options.nist_table):
        csvrow = [
            row[t] for t in [
                'url', 'reference_id', 'method', 'evaluation', 'ec', 'enzyme',
                'kegg_reaction', 'reaction'
            ]
        ]
        csvrow += [
            reformat_number_string(row['K'], '%.3e'),
            reformat_number_string(row['K_tag'], '%.3e'),
            reformat_number_string(row['T'], '%.2f'),
            reformat_number_string(row['I'], '%.2f'),
            reformat_number_string(row['pH'], '%.2f'),
            reformat_number_string(row['pMg'], '%.2f')
        ]
        csv_writer.writerow(csvrow)
Ejemplo n.º 2
0
    def run(self):
        from toolbox.molecule import Molecule

        self.semaphore.acquire()

        start_time = time.time()

        logging.debug("SMILES: " + self.smiles)
        diss_table = Molecule._GetDissociationTable(self.smiles,
                                                    fmt='smiles',
                                                    mid_pH=default_pH,
                                                    min_pKa=0,
                                                    max_pKa=14,
                                                    T=default_T)
        logging.debug("Min charge: %d" % diss_table.min_charge)
        logging.debug("Min nH: %d" % diss_table.min_nH)

        elapsed_time = time.time() - start_time
        self.db_lock.acquire()
        db = SqliteDatabase(self.options.db_file)
        kegg = Kegg.getInstance()
        name = kegg.cid2name(self.cid)

        if diss_table is not None:
            for row in diss_table.ToDatabaseRow():
                db.Insert(self.options.table_name, [self.cid, name] + row)
        else:
            db.Insert(self.options.table_name, [self.cid, name] + [None] * 10)
        del db
        self.db_lock.release()

        logging.info("Completed C%05d, elapsed time = %.1f sec" %
                     (self.cid, elapsed_time))

        self.semaphore.release()
Ejemplo n.º 3
0
 def __init__(self, T_range=(298, 314)):
     self.db = SqliteDatabase('../data/public_data.sqlite')
     self.kegg = Kegg.getInstance()
     self.T_range = T_range
     self.pH_range = None
     self.override_I = None
     self.override_pMg = None
     self.override_T = None
     self.FromDatabase()
     self.BalanceReactions()
Ejemplo n.º 4
0
    def setUp(self):
        fake_csv_file = StringIO(CSV_DATA)
        csv_reader = csv.DictReader(fake_csv_file)
        self.fake_thermo_csv = PsuedoisomerTableThermodynamics()
        self.fake_thermo_csv = PsuedoisomerTableThermodynamics._FromDictReader(
            csv_reader, self.fake_thermo_csv, warn_for_conflicting_refs=False)

        db = SqliteDatabase(PUBLIC_DB_FNAME)
        db_reader = db.DictReader('fake_pseudoisomers')
        self.fake_thermo_db = PsuedoisomerTableThermodynamics()
        self.fake_thermo_db = PsuedoisomerTableThermodynamics._FromDictReader(
            db_reader, self.fake_thermo_db, warn_for_conflicting_refs=False)
Ejemplo n.º 5
0
def compare_charges():
    #db_public = SqliteDatabase('../data/public_data.sqlite')
    db_gibbs = SqliteDatabase('../res/gibbs.sqlite')
    print "Writing Compare Charges report to ../res/groups_report.html"
    html_writer = HtmlWriter("../res/groups_report.html")
    kegg = Kegg.getInstance()

    #pH, I, pMg, T = default_pH, default_I, default_pMg, default_T
    pH, I, pMg, T = default_pH, 0, 14, default_T

    cid2error = {}
    for row_dict in db_gibbs.DictReader("gc_errors"):
        cid = int(row_dict['cid'])
        cid2error[cid] = row_dict['error']

    estimators = {}
    estimators['hatzi'] = Hatzi(use_pKa=False)
    estimators['milo'] = PsuedoisomerTableThermodynamics.FromDatabase(
        db_gibbs, 'gc_pseudoisomers', name='Milo Group Contribution')

    all_cids = set(lsum([e.get_all_cids() for e in estimators.values()]))
    dict_list = []
    for cid in all_cids:
        try:
            name = kegg.cid2name(cid)
            link = kegg.cid2compound(cid).get_link()
        except KeyError:
            name = "unknown"
            link = ""
        row_dict = {
            'cid': '<a href="%s">C%05d</a>' % (link, cid),
            'name': name,
            'error': cid2error.get(cid, None)
        }
        for key, est in estimators.iteritems():
            try:
                pmap = est.cid2PseudoisomerMap(cid)
                dG0, dG0_tag, nH, z, nMg = pmap.GetMostAbundantPseudoisomer(
                    pH, I, pMg, T)
            except MissingCompoundFormationEnergy:
                dG0, dG0_tag, nH, z, nMg = "", "", "", "", ""
            row_dict['nH_' + key] = nH
            row_dict['charge_' + key] = z
            row_dict['nMg_' + key] = nMg
            row_dict['dG0_' + key] = dG0
            row_dict['dG0_tag_' + key] = dG0_tag
        dict_list.append(row_dict)

    html_writer.write_table(
        dict_list,
        headers=['cid', 'name', 'charge_hatzi', 'charge_milo', 'error'])
    html_writer.close()
Ejemplo n.º 6
0
def LoadAllEstimators():
    db_public = SqliteDatabase('../data/public_data.sqlite')
    db_gibbs = SqliteDatabase('../res/gibbs.sqlite')

    if not db_gibbs.DoesTableExist('prc_pseudoisomers'):
        nist_regression = NistRegression(db_gibbs)
        nist_regression.Train()

    tables = {
        'alberty': (db_public, 'alberty_pseudoisomers', 'Alberty'),
        'PRC': (db_gibbs, 'prc_pseudoisomers', 'our method (PRC)')
    }
    estimators = {}
    for key, (db, table_name, thermo_name) in tables.iteritems():
        if db.DoesTableExist(table_name):
            estimators[key] = PsuedoisomerTableThermodynamics.FromDatabase(
                db, table_name, name=thermo_name)
        else:
            logging.warning('The table %s does not exist in %s' %
                            (table_name, str(db)))

    estimators['hatzi_gc'] = Hatzi(use_pKa=False)
    #estimators['hatzi_gc_pka'] = Hatzi(use_pKa=True)

    if db.DoesTableExist('bgc_pseudoisomers'):
        estimators['BGC'] = GroupContribution(db=db_gibbs, transformed=True)
        estimators['BGC'].init()
        estimators['BGC'].name = 'our method (BGC)'

    if db.DoesTableExist('pgc_pseudoisomers'):
        estimators['PGC'] = GroupContribution(db=db_gibbs, transformed=False)
        estimators['PGC'].init()
        estimators['PGC'].name = 'our method (PGC)'

    estimators['UGC'] = UnifiedGroupContribution(db=db_gibbs)
    estimators['UGC'].init()
    estimators['UGC'].name = 'our method (UGC)'

    estimators['C1'] = ReactionThermodynamics.FromCsv(
        '../data/thermodynamics/c1_reaction_thermodynamics.csv',
        estimators['alberty'])

    if 'PGC' in estimators:
        estimators['merged'] = BinaryThermodynamics(estimators['alberty'],
                                                    estimators['PGC'])
        estimators['merged_C1'] = BinaryThermodynamics(estimators['C1'],
                                                       estimators['PGC'])

    for thermo in estimators.values():
        thermo.load_bounds('../data/thermodynamics/concentration_bounds.csv')

    return estimators
Ejemplo n.º 7
0
def example_reductive(thermo):
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/pathologic.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=15,
                    maximal_dG=0.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    add_cofactor_reactions(pl)
    add_redox_reactions(pl)
    r = Reaction.FromFormula("3 C00011 => C00022")
    #r.Balance()
    pl.find_path("reductive", r)
Ejemplo n.º 8
0
def example_oxidative(thermo):
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/pathologic.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=10,
                    maximal_dG=0,
                    thermodynamic_method=OptimizationMethods.MAX_TOTAL,
                    update_file=None)
    add_cofactor_reactions(pl)
    add_redox_reactions(pl, NAD_only=False)
    r = Reaction.FromFormula("C00022 => 3 C00011")
    #r.Balance()
    pl.find_path("oxidative", r)
Ejemplo n.º 9
0
 def __init__(self, html_fname):
     self.serv = None
     self.db = SqliteDatabase('channeling/channeling.sqlite', 'w')
     self.html_writer = HtmlWriter(html_fname)
     
     self.COMPOUND_TABLE_NAME = 'kegg_compounds'
     self.GENE_TABLE_NAME = 'kegg_genes'
     self.GENE_REACTION_TABLE_NAME = 'kegg_genes_to_reactions'
     self.REACTION_TABLE_NAME = 'kegg_reactions'
     self.EQUATION_TABLE_NAME = 'kegg_equations'
     self.STOICHIOMETRY_TABLE_NAME = 'kegg_stoichiometry'
     self.GIBBS_ENERGY_TABLE_NAME = 'kegg_gibbs_energies'
     self.GENE_ENERGY_TABLE_NAME = 'kegg_gene_energies'
     self.FUNCTIONAL_INTERATCTIONS_TABLE = 'parkinson_functional_interactions'
     self.GENE_PAIRS_TABLE_NAME = 'kegg_gene_pairs'
     self.COFACTOR_TABLE_NAME = 'kegg_cofactors'
Ejemplo n.º 10
0
def runBeta2Alpha(thermo, reactionList):
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/Beta2Alpha.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=15,
                    maximal_dG=0.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    add_cofactor_reactions(pl)
    add_redox_reactions(pl)
    for r in reactionList:
        pl.add_reaction(Reaction.FromFormula(r, "Auto generate #%s" % hash(r)))
    r = Reaction.FromFormula("C00099 => C01401")
    pl.find_path("Beta2Alpha", r)
Ejemplo n.º 11
0
def GetC1Thermodynamics(
        html_writer,
        reaction_fname='../data/thermodynamics/c1_reaction_thermodynamics.csv'
):
    html_writer.write("<h1>C1 thermodynamics</h1>\n")

    dict_list = []
    db_public = SqliteDatabase('../data/public_data.sqlite')
    alberty = PsuedoisomerTableThermodynamics.FromDatabase(\
                        db_public, 'alberty_pseudoisomers', name='alberty')
    alberty.AddPseudoisomer(101, nH=23, z=0, nMg=0, dG0=0)
    reacthermo = ReactionThermodynamics(alberty, 'C1')
    reacthermo.pH = 7
    reacthermo.I = 0.1
    reacthermo.T = 298.15
    reacthermo.pMg = 14

    c1_reactions = []
    for row in csv.DictReader(open(reaction_fname, 'r')):
        r = Reaction.FromFormula(row['formula'])
        r.Balance(balance_water=False)
        r.SetNames(row['enzyme'])
        dG0_r_prime = float(row['dG0_r_prime'])
        pH, I, pMg, T = [float(row[k]) for k in ['pH', 'I', 'pMg', 'T']]
        reacthermo.AddReaction(r, dG0_r_prime, pH=pH, I=I, pMg=pMg, T=T)
        c1_reactions.append(r)

        row['formula'] = r.to_hypertext(show_cids=False)
        dict_list.append(row)

    html_writer.write_table(
        dict_list, headers=['acronym', 'enzyme', 'formula', 'dG0_r_prime'])

    reacthermo._Recalculate()
    return reacthermo
def MatchCRCDataToKEGG():
    """
        Reads the raw data collected from the CRC handbook and tries to map every
        compound name there to a KEGG compound ID.
        Then it writes the results to the 'Public' Database
    """

    public_db = SqliteDatabase('../data/public_data.sqlite')
    kegg = Kegg.getInstance()

    cas2cid = {}
    for cid, comp in kegg.cid2compound_map.iteritems():
        if comp.cas:
            cas2cid[comp.cas] = cid

    public_db.CreateTable('pKa_from_CRC', [
        'cas TEXT', 'cid INT', 'name TEXT', 'formula TEXT', 'T REAL',
        'pKa REAL'
    ])
    for row_dict in csv.DictReader(
            open('../data/thermodynamics/pKa_from_CRC.csv', 'r')):
        cas = row_dict['CAS']
        name = row_dict['name']
        formula = row_dict['formula']
        if row_dict['T']:
            T = 273.15 + float(row_dict['T'])
        else:
            T = None

        if row_dict['pKa'][0] == '~':
            pKa = float(row_dict['pKa'][1:])
        else:
            pKa = float(row_dict['pKa'])

        if cas not in cas2cid:
            logging.warning('Cannot find this CAS number (%s, %s) in KEGG' %
                            (cas, name))
            cid = None
        else:
            cid = cas2cid[cas]
            name = kegg.cid2name(cid)
        public_db.Insert(
            'pKa_from_CRC',
            [unicode(cas), cid,
             unicode(name),
             unicode(formula), T, pKa])
    public_db.Commit()
Ejemplo n.º 13
0
def example_lower_glycolysis(thermo):
    
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/pathologic.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=8,
                    maximal_dG=0.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    add_cofactor_reactions(pl)
    add_redox_reactions(pl)
    #r = Reaction.FromFormula("C00003 + C00118 + C00001 => C00022 + C00004 + C00009")
    r = Reaction.FromFormula("C00118 => C00022")
    #r.Balance()
    pl.find_path("GAP => PYR", r)
Ejemplo n.º 14
0
def example_glycolysis(thermo):
    
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/pathologic.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=15,
                    maximal_dG=0.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    add_cofactor_reactions(pl, free_ATP_hydrolysis=False)
    ban_toxic_compounds(pl)
    #add_carbon_counts(pl)
    #r = Reaction.FromFormula("C00031 => 6 C06265")
    r = Reaction.FromFormula("C00031 + 3 C00008 => 2 C00186 + 3 C00002")
    #r.Balance()
    pl.find_path("GLC => 2 LAC, 3 ATP, No methylglyoxal", r)
Ejemplo n.º 15
0
def example_rpi_bypass(thermo):
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/pathologic.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=10,
                    maximal_dG=0.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    add_cofactor_reactions(pl)
    #add_redox_reactions(pl)
    pl.delete_reaction(1056) # ribose-phosphate isomerase
    pl.delete_reaction(1081) # ribose isomerase

    r = Reaction.FromFormula("C00117 => C01182")
    #r.Balance()
    pl.find_path("rpi_bypass", r)
Ejemplo n.º 16
0
def example_three_acetate(thermo):
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/pathologic.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=20,
                    maximal_dG=0.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    add_cofactor_reactions(pl)
    #add_redox_reactions(pl)
    pl.delete_reaction(761) # F6P + Pi = E4P + acetyl-P
    pl.delete_reaction(1621) # X5P + Pi = GA3P + acetyl-P

    r = Reaction.FromFormula("C00031 => 3 C00033")
    #r.Balance()
    pl.find_path("three_acetate", r)
Ejemplo n.º 17
0
def example_more_than_two_pyruvate(thermo):
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/pathologic.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=20,
                    maximal_dG=0.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    #add_cofactor_reactions(pl)
    #add_XTP_reactions(pl, '=>')
    #add_redox_reactions(pl)
    #pl.delete_reaction(761) # F6P + Pi = E4P + acetyl-P
    #pl.delete_reaction(1621) # X5P + Pi = GA3P + acetyl-P

    r = Reaction.FromFormula("3 C00031 + 3 C00011 + C00003 => 7 C00022 + 3 C00001 + C00004")
    r.Balance()
    pl.find_path("more_than_two_pyr", r)
Ejemplo n.º 18
0
def example_glucose_to_ethanol_and_formate(thermo):
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/pathologic.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=15,
                    maximal_dG=0.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    #add_cofactor_reactions(pl)
    #add_XTP_reactions(pl, '=>')
    #add_redox_reactions(pl)
    #pl.delete_reaction(761) # F6P + Pi = E4P + acetyl-P
    #pl.delete_reaction(1621) # X5P + Pi = GA3P + acetyl-P

    r = Reaction.FromFormula("2 C00031 + 3 C00001 => 6 C00058 + 3 C00469")
    r.Balance()
    pl.find_path("glucose_to_ethanol_and_formate", r)
Ejemplo n.º 19
0
def main():
    options, _ = MakeOpts().parse_args(sys.argv)
    
    db = SqliteDatabase("../res/gibbs.sqlite")
    public_db = SqliteDatabase("../data/public_data.sqlite")
    output_filename = os.path.abspath(options.output_filename)
    logging.info('Will write output to %s' % output_filename)
    
    html_writer = HtmlWriter(output_filename)
    nist = Nist(T_range=None)
    nist_regression = NistRegression(db, html_writer=html_writer, nist=nist)
    nist_regression.std_diff_threshold = 5 # the threshold over which to print an analysis of a reaction
    #nist_regression.nist.T_range = None(273.15 + 24, 273.15 + 40)
    #nist_regression.nist.override_I = 0.25
    #nist_regression.nist.override_pMg = 14.0

    html_writer.write("<h2>NIST regression:</h2>")
    if options.use_prior:
        logging.info('Using the data from Alberty as fixed prior')
        prior_thermo = PsuedoisomerTableThermodynamics.FromDatabase(
            public_db, 'alberty_pseudoisomers', name="Alberty")
    else:
        prior_thermo = None
    html_writer.write('</br><b>Regression Tables</b>\n')
    html_writer.insert_toggle(start_here=True)
    nist_regression.Train(options.from_database, prior_thermo)
    html_writer.div_end()
 
    html_writer.write('</br><b>PRC results</b>\n')
    html_writer.insert_toggle(start_here=True)
    nist_regression.WriteDataToHtml(html_writer)
    html_writer.div_end()

    html_writer.write('</br><b>Transformed reaction energies - PRC vs. Observed</b>\n')
    html_writer.insert_toggle(start_here=True)
    N, rmse = nist_regression.VerifyResults()
    html_writer.div_end()
    
    logging.info("Regression results for transformed data:")
    logging.info("N = %d, RMSE = %.1f" % (N, rmse))

    html_writer.close()
Ejemplo n.º 20
0
def test_single_modules(mids):
    from pygibbs.groups import GroupContribution
    db = SqliteDatabase('../res/gibbs.sqlite')
    html_writer = HtmlWriter("../res/thermodynamic_module_analysis.html")
    gc = GroupContribution(db, html_writer)
    gc.init()

    for mid in mids:
        html_writer.write("<h2>M%05d</h2>\n" % mid)
        S, rids, fluxes, cids = gc.kegg.get_module(mid)
        thermodynamic_pathway_analysis(S, rids, fluxes, cids, gc, html_writer)
Ejemplo n.º 21
0
def example_formate(thermo, product_cid=22, co2_conc=1e-5):
    co2_hydration = Reaction.FromFormula("C00011 + C00001 => C00288")
    co2_hydration_dG0_prime = float(thermo.GetTransfromedKeggReactionEnergies([co2_hydration]))
    carbonate_conc = co2_conc * np.exp(-co2_hydration_dG0_prime / (R*default_T))
    thermo.bounds[11] = (co2_conc, co2_conc)
    thermo.bounds[288] = (carbonate_conc, carbonate_conc)
    
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/pathologic.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=20,
                    maximal_dG=0.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    add_cofactor_reactions(pl, free_ATP_hydrolysis=True)
    add_redox_reactions(pl, NAD_only=False)
   
    pl.delete_reaction(134) # formate:NADP+ oxidoreductase
    pl.delete_reaction(519) # Formate:NAD+ oxidoreductase
    pl.delete_reaction(24) # Rubisco
    pl.delete_reaction(581) # L-serine:NAD+ oxidoreductase (deaminating)
    pl.delete_reaction(220) # L-serine ammonia-lyase
    pl.delete_reaction(13) # glyoxylate carboxy-lyase (dimerizing; tartronate-semialdehyde-forming)
    pl.delete_reaction(585) # L-Serine:pyruvate aminotransferase
    pl.delete_reaction(1440) # D-Xylulose-5-phosphate:formaldehyde glycolaldehydetransferase
    pl.delete_reaction(5338) # 3-hexulose-6-phosphate synthase
    
    
    pl.add_reaction(Reaction.FromFormula("C06265 => C00011", name="CO2 uptake"))
    pl.add_reaction(Reaction.FromFormula("C06265 => C00288", name="carbonate uptake"))
    pl.add_reaction(Reaction.FromFormula("C06265 => C00058", name="formate uptake"))

    r = Reaction.FromFormula("5 C06265 + C00058 => C%05d" % product_cid) # at least one formate to product
    #r.Balance()
    
    kegg = Kegg.getInstance()
    pl.find_path("formate to %s" % kegg.cid2name(product_cid), r)
Ejemplo n.º 22
0
def ExportJSONFiles():
    estimators = LoadAllEstimators()
    options, _ = MakeOpts(estimators).parse_args(sys.argv)

    thermo_list = []
    thermo_list.append(estimators[options.thermodynamics_source])
    thermo_list.append(
        PsuedoisomerTableThermodynamics.FromCsvFile(
            options.thermodynamics_csv))

    # Make sure we have all the data.
    kegg = Kegg.getInstance()
    for i, thermo in enumerate(thermo_list):
        print "Priority %d - formation energies of: %s" % (i + 1, thermo.name)
        kegg.AddThermodynamicData(thermo, priority=(i + 1))

    db = SqliteDatabase('../res/gibbs.sqlite')

    print 'Exporting Group Contribution Nullspace matrix as JSON.'
    nullspace_vectors = []
    for row in db.DictReader('ugc_conservations'):
        d = {'msg': row['msg']}
        sparse = json.loads(row['json'])
        d['reaction'] = []
        for cid, coeff in sparse.iteritems():
            d['reaction'].append([coeff, "C%05d" % int(cid)])
        nullspace_vectors.append(d)
    WriteJSONFile(nullspace_vectors, options.nullspace_out_filename)

    print 'Exporting KEGG compounds as JSON.'
    WriteJSONFile(kegg.AllCompounds(), options.compounds_out_filename)

    print 'Exporting KEGG reactions as JSON.'
    WriteJSONFile(kegg.AllReactions(), options.reactions_out_filename)

    print 'Exporting KEGG enzymes as JSON.'
    WriteJSONFile(kegg.AllEnzymes(), options.enzymes_out_filename)
Ejemplo n.º 23
0
def runPathologic(thermo, reactionList):
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/mog_finder.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=15,
                    maximal_dG=-3.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    add_cofactor_reactions(pl)
    add_redox_reactions(pl)
    for r in reactionList:
        pl.add_reaction(Reaction.FromFormula(r, "Auto generate #%s" % hash(r)))
    pl.delete_reaction(134)
    pl.delete_reaction(344)
    pl.delete_reaction(575)
    pl.delete_reaction(212)
    #pl.add_reaction(Reaction.FromFormula('C00149 + C00006 <=> C00036 + C00005 + C00080',
    #                                     'malate + NADP+ = oxaloacetate + NADPH',343))
    #pl.add_reaction(Reaction.FromFormula('C00222 + C00010 + C00006 <=> C00083 + C00005',
    #                                     'malonate-semialdehyde + CoA + NADP+ = malonyl-CoA + NADPH',740))
    r = Reaction.FromFormula("2 C00288 => C00048")
    pl.find_path("MOG_finder", r)
Ejemplo n.º 24
0
def main():
    pH, pMg, I, T = (7.0, 3, 0.1, 298.15)

    db = SqliteDatabase('../res/gibbs.sqlite')
    kegg = Kegg.getInstance()
    alberty = PsuedoisomerTableThermodynamics(
        '../data/thermodynamics/alberty_pseudoisomers.csv')

    cids = alberty.get_all_cids()
    dG0_f = pylab.zeros((len(cids), 1))

    for i, cid in enumerate(cids):
        dG0_f[i, 0] = alberty.cid2dG0_tag(cid, pH=pH, pMg=pMg, I=I, T=T)

    S = pylab.zeros((0, len(cids)))
    rids = []
    ec_numbers = []

    for rid in kegg.get_all_rids():
        sparse = kegg.rid2sparse_reaction(rid)
        if not set(cids).issuperset(sparse.keys()):
            continue

        rids.append(rid)
        ec_numbers.append(kegg.rid2ec_list(rid))
        S_row = pylab.zeros((1, len(cids)))
        for cid, coeff in sparse.iteritems():
            S_row[0, cids.index(cid)] = coeff
        S = pylab.vstack([S, S_row])

    dG0_r = pylab.dot(S, dG0_f)

    util._mkdir('../res/arren')
    s_writer = csv.writer(open('../res/arren/stoichiomety.csv', 'w'))
    r_writer = csv.writer(open('../res/arren/reactions.csv', 'w'))
    e_writer = csv.writer(open('../res/arren/ec_numbers.csv', 'w'))
    r_writer.writerow(['rid', 'dG0_r'])
    e_writer.writerow(['rid', 'ec0', 'ec1', 'ec2', 'ec3'])
    for i in xrange(S.shape[0]):
        s_writer.writerow(["%d" % x for x in S[i, :]])
        for ec in ec_numbers[i].split(';'):
            e_writer.writerow(['%d' % rids[i]] + ec.split('.'))
        r_writer.writerow(["%d" % rids[i], '%.1f' % dG0_r[i, 0]])

    c_writer = csv.writer(open('../res/arren/compounds.csv', 'w'))
    c_writer.writerow(['cid', 'dG0_f'])
    for j in xrange(len(cids)):
        c_writer.writerow(['%d' % cids[j], '%.1f' % dG0_f[j, 0]])
Ejemplo n.º 25
0
def ExportJSONFiles():
    estimators = LoadAllEstimators()
    options, _ = MakeOpts(estimators).parse_args(sys.argv)

    thermo = estimators[options.thermodynamics_source]
    print "Using the thermodynamic estimations of: " + thermo.name

    # Make sure we have all the data.
    kegg = Kegg.getInstance()
    kegg.AddThermodynamicData(estimators['alberty'], priority=1)
    kegg.AddThermodynamicData(thermo, priority=2)

    db = SqliteDatabase('../res/gibbs.sqlite')
    kegg.AddGroupVectorData(db, table_name='pgc_groupvector')

    print 'Exporting KEGG compound pseudoisomers as JSON.'
    WriteJSONFile(kegg.AllCompounds(), options.out_filename)
Ejemplo n.º 26
0
def CreateDummyDB():
    db = SqliteDatabase('/tmp/dummy.sqlite', 'w')
    db.CreateTable('tecan_readings',
                   'exp_id TEXT, plate TEXT, reading_label TEXT, row INT, col INT, time INT, measurement REAL',
                   drop_if_exists=False)
    db.CreateTable('tecan_labels',
                   'exp_id TEXT, plate INT, row INT, col INT, label TEXT',
                   drop_if_exists=False)
    db.CreateTable('tecan_plates',
                   'exp_id TEXT, plate INT, description TEXT, owner TEXT, project TEXT',
                   drop_if_exists=False)
    db.CreateTable('tecan_experiments',
                   'exp_id TEXT, serial_number TEXT, desciption TEXT',
                   drop_if_exists=False)
    db.CreateTable('tecan_scripts',
                   'exp_id TEXT, script BLOB',
                   drop_if_exists=False)
    return db
Ejemplo n.º 27
0
def dissociation_decomposition_test():
    """
        Verifies that the decomposition of the compounds in the dissociation table match the nH of each species.
    """
    db = SqliteDatabase('../res/gibbs.sqlite')
    dissociation = DissociationConstants.FromPublicDB()
    groups_data = GroupsData.FromDatabase(db)
    group_decomposer = GroupDecomposer(groups_data)
    kegg = Kegg.getInstance()

    for cid in dissociation.GetAllCids():
        id = "C%05d (%s)" % (cid, kegg.cid2name(cid))
        if kegg.cid2compound(cid).get_atom_bag() is None:
            logging.debug('%s: has no explicit formula' % id)
        else:
            diss = dissociation.GetDissociationTable(cid,
                                                     create_if_missing=False)
            test_dissociation_table(diss,
                                    group_decomposer,
                                    id,
                                    ignore_missing_smiles=True)
Ejemplo n.º 28
0
def main():
    estimators = LoadAllEstimators()
    args, _ = MakeOpts(estimators).parse_args(sys.argv)
    
    # Make sure we have all the data.
    db = SqliteDatabase('../res/gibbs.sqlite')
    G = GroupContribution(db=db, html_writer=NullHtmlWriter(),
                          transformed=args.transformed)
    G.init()
    
    print 'Exporting KEGG compounds to %s' % args.compounds_out_filename
    csv_writer = csv.writer(open(args.compounds_out_filename, 'w'))
    csv_writer.writerow(["KEGG ID", "nH", "CHARGE", "nMg", "dG0_f"])
    for cid in sorted(G.get_all_cids()):
        try:
            for nH, z, nMg, dG0 in G.cid2PseudoisomerMap(cid).ToMatrix():
                csv_writer.writerow(["C%05d" % cid, nH, z, nMg, "%.1f" % dG0])
        except MissingCompoundFormationEnergy as e:
            csv_writer.writerow(["C%05d" % cid, None, None, None, str(e)])
        
    print 'Exporting KEGG reactions to %s' % args.reactions_out_filename
    csv_writer = csv.writer(open(args.reactions_out_filename, 'w'))
    csv_writer.writerow(["KEGG ID", "dG'0_r (pH=%.1f, I=%.2f, pMg=%.1f, T=%.1f)" % 
                         (args.ph, args.i_s, args.pmg, args.temp)])
    for rid in sorted(G.kegg.get_all_rids()):
        reaction = G.kegg.rid2reaction(rid)
        try:
            reaction.Balance(balance_water=True)
            dG0_r = reaction.PredictReactionEnergy(G, pH=args.ph,
                        pMg=args.pmg, I=args.i_s, T=args.temp)
            csv_writer.writerow(["R%05d" % rid, "%.1f" % dG0_r])
        except (KeggParseException,
                MissingCompoundFormationEnergy, 
                KeggReactionNotBalancedException,
                MissingReactionEnergy,
                KeyError,
                OpenBabelError) as e:
            csv_writer.writerow(["R%05d" % rid, str(e)])
Ejemplo n.º 29
0
    def __init__(self, use_pKa=True):
        if use_pKa:
            Thermodynamics.__init__(self, "Jankowski et al. (+pKa)")
            self.dissociation = DissociationConstants.FromPublicDB()
        else:
            Thermodynamics.__init__(self, "Jankowski et al.")
            self.dissociation = None
        self.db = SqliteDatabase('../res/gibbs.sqlite', 'w')
        self.cid2pmap_dict = {}

        # the conditions in which Hatzimanikatis makes his predictions
        self.Hatzi_pH = 7.0
        self.Hatzi_I = 0.0
        self.Hatzi_pMg = 14.0
        self.Hatzi_T = 298.15

        self.kegg = Kegg.getInstance()

        # for some reason, Hatzimanikatis doesn't indicate that H+ is zero,
        # so we add it here
        H_pmap = PseudoisomerMap()
        H_pmap.Add(0, 0, 0, 0)
        self.SetPseudoisomerMap(80, H_pmap)

        self.cid2dG0_tag_dict = {80: 0}
        self.cid2charge_dict = {80: 0}

        for row in csv.DictReader(open(HATZI_CSV_FNAME, 'r')):
            cid = int(row['ENTRY'][1:])
            self.cid2source_string[cid] = 'Jankowski et al. 2008'
            if row['DELTAG'] == "Not calculated":
                continue
            if cid == 3178:
                # this compound, which is supposed to be "Tetrahydroxypteridine"
                # seems to be mapped to something else by Hatzimanikatis
                continue
            self.cid2dG0_tag_dict[cid] = float(row['DELTAG']) * J_per_cal
            self.cid2charge_dict[cid] = int(row['CHARGE'])
Ejemplo n.º 30
0
def nist_dissociation_test():
    """
        Verifies that all the compounds in NIST are covered by the dissociation table, including SMILES strings.
    """
    db = SqliteDatabase('../res/gibbs.sqlite')
    nist_regression = NistRegression(db, html_writer=NullHtmlWriter())
    dissociation = nist_regression.dissociation
    groups_data = GroupsData.FromDatabase(db)
    group_decomposer = GroupDecomposer(groups_data)
    kegg = Kegg.getInstance()

    nist = nist_regression.nist
    for cid in nist.GetAllCids():
        id = "C%05d (%s)" % (cid, kegg.cid2name(cid))
        if kegg.cid2compound(cid).get_atom_bag() is None:
            logging.debug('%s: has no explicit formula' % id)
        else:
            diss = dissociation.GetDissociationTable(cid,
                                                     create_if_missing=False)
            test_dissociation_table(diss,
                                    group_decomposer,
                                    id,
                                    ignore_missing_smiles=False)