Beispiel #1
0
def main():
    html_writer = HtmlWriter("../res/formation_resolve.html")
    estimators = LoadAllEstimators()
    for name in ['alberty']:
        thermo = estimators[name]
        nist = Nist()
        nist.verify_formation(html_writer=html_writer, 
                              thermodynamics=thermo,
                              name=name)
    html_writer.close()
Beispiel #2
0
def test_single_modules(mids):
    from pygibbs.groups import GroupContribution
    db = SqliteDatabase('../res/gibbs.sqlite')
    html_writer = HtmlWriter("../res/thermodynamic_module_analysis.html")
    gc = GroupContribution(db, html_writer)
    gc.init()

    for mid in mids:
        html_writer.write("<h2>M%05d</h2>\n" % mid)
        S, rids, fluxes, cids = gc.kegg.get_module(mid)
        thermodynamic_pathway_analysis(S, rids, fluxes, cids, gc, html_writer)
Beispiel #3
0
def test_single_modules(mids):
    from pygibbs.groups import GroupContribution
    db = SqliteDatabase('../res/gibbs.sqlite')
    html_writer = HtmlWriter("../res/thermodynamic_module_analysis.html")
    gc = GroupContribution(db, html_writer)
    gc.init()
    
    for mid in mids:
        html_writer.write("<h2>M%05d</h2>\n" % mid)
        S, rids, fluxes, cids = gc.kegg.get_module(mid)
        thermodynamic_pathway_analysis(S, rids, fluxes, cids, gc, html_writer)
Beispiel #4
0
def meta_regulated_rxns_cumul_plots(org, id, thermo):
    db = SqliteDatabase('../res/gibbs.sqlite')
    html_writer = HtmlWriter('../res/' + org + id + '_regulation.html')
    metacyc_inst = MetaCyc(org, db)
    c_mid = 1e-4
    cmap = GetConcentrationMap()
    pH, pMg, I, T = (7.0, 14.0, 0.25, 298.15)
    
    histogram = calculate_metacyc_regulation_reversibility_histogram(thermo, c_mid, pH, pMg, I, T, metacyc_inst,
                                                  cmap=cmap, id=(org + id))
    
    html_writer.write('<h1>Constrained co-factors</h1>')
    fig1 = plot_histogram(histogram, html_writer, title=('%s Reactions: With constraints on co-factors' % org), xlim=20, min_to_show=5, xmin=0, legend_loc='lower right')
    html_writer.embed_matplotlib_figure(fig1, width=640, height=480)
    pylab.savefig('../res/' + org + id +  '_regulation.png', figure=fig1, format='png')
Beispiel #5
0
 def __init__(self, html_fname):
     self.serv = None
     self.db = SqliteDatabase('channeling/channeling.sqlite', 'w')
     self.html_writer = HtmlWriter(html_fname)
     
     self.COMPOUND_TABLE_NAME = 'kegg_compounds'
     self.GENE_TABLE_NAME = 'kegg_genes'
     self.GENE_REACTION_TABLE_NAME = 'kegg_genes_to_reactions'
     self.REACTION_TABLE_NAME = 'kegg_reactions'
     self.EQUATION_TABLE_NAME = 'kegg_equations'
     self.STOICHIOMETRY_TABLE_NAME = 'kegg_stoichiometry'
     self.GIBBS_ENERGY_TABLE_NAME = 'kegg_gibbs_energies'
     self.GENE_ENERGY_TABLE_NAME = 'kegg_gene_energies'
     self.FUNCTIONAL_INTERATCTIONS_TABLE = 'parkinson_functional_interactions'
     self.GENE_PAIRS_TABLE_NAME = 'kegg_gene_pairs'
     self.COFACTOR_TABLE_NAME = 'kegg_cofactors'
Beispiel #6
0
def compare_charges():
    #db_public = SqliteDatabase('../data/public_data.sqlite')
    db_gibbs = SqliteDatabase('../res/gibbs.sqlite')
    print "Writing Compare Charges report to ../res/groups_report.html"
    html_writer = HtmlWriter("../res/groups_report.html")
    kegg = Kegg.getInstance()
    
    #pH, I, pMg, T = default_pH, default_I, default_pMg, default_T
    pH, I, pMg, T = default_pH, 0, 14, default_T
    
    cid2error = {}
    for row_dict in db_gibbs.DictReader("gc_errors"):
        cid = int(row_dict['cid'])
        cid2error[cid] = row_dict['error']

    estimators = {}
    estimators['hatzi'] = Hatzi(use_pKa=False)
    estimators['milo'] = PsuedoisomerTableThermodynamics.FromDatabase(
        db_gibbs, 'gc_pseudoisomers', name='Milo Group Contribution')
    
    all_cids = set(lsum([e.get_all_cids() for e in estimators.values()]))
    dict_list = []
    for cid in all_cids:
        try:
            name = kegg.cid2name(cid)
            link = kegg.cid2compound(cid).get_link()
        except KeyError:
            name = "unknown"
            link = ""
        row_dict = {'cid':'<a href="%s">C%05d</a>' % (link, cid),
                    'name':name, 'error':cid2error.get(cid, None)}
        for key, est in estimators.iteritems():
            try:
                pmap = est.cid2PseudoisomerMap(cid)
                dG0, dG0_tag, nH, z, nMg = pmap.GetMostAbundantPseudoisomer(pH, I, pMg, T)
            except MissingCompoundFormationEnergy:
                dG0, dG0_tag, nH, z, nMg = "", "", "", "", ""
            row_dict['nH_' + key] = nH
            row_dict['charge_' + key] = z
            row_dict['nMg_' + key] = nMg
            row_dict['dG0_' + key] = dG0
            row_dict['dG0_tag_' + key] = dG0_tag
        dict_list.append(row_dict)
        
    html_writer.write_table(dict_list, headers=['cid', 'name', 'charge_hatzi', 'charge_milo', 'error'])
    html_writer.close()
Beispiel #7
0
def AnalyzeConcentrationGradient(prefix,
                                 thermo,
                                 csv_output_fname,
                                 cid=13):  # default compound is PPi
    compound_name = thermo.kegg.cid2name(cid)
    kegg_file = ParsedKeggFile.FromKeggFile('../data/thermodynamics/%s.txt' %
                                            prefix)
    html_writer = HtmlWriter('../res/%s.html' % prefix)
    null_html_writer = NullHtmlWriter()
    if csv_output_fname:
        csv_output = csv.writer(open(csv_output_fname, 'w'))
        csv_output.writerow(['pH', 'I', 'T', '[C%05d]' % cid] +
                            kegg_file.entries())
    else:
        csv_output = None

    pH_vec = np.array(
        [7])  # this needs to be fixed so that the txt file will set the pH
    conc_vec = 10**(-np.arange(2, 6.0001, 0.25)
                    )  # logarithmic scale between 10mM and 1nM
    override_bounds = {}

    fig = plt.figure(figsize=(6, 6), dpi=90)
    legend = []
    for pH in pH_vec.flat:
        obd_vec = []
        for conc in conc_vec.flat:
            override_bounds[cid] = (conc, conc)
            logging.info("pH = %g, [%s] = %.1e M" % (pH, compound_name, conc))
            data, labels = pareto(kegg_file,
                                  null_html_writer,
                                  thermo,
                                  pH=pH,
                                  section_prefix="",
                                  balance_water=True,
                                  override_bounds=override_bounds)
            obd_vec.append(data[:, 1])
            csv_output.writerow([pH, thermo.I, thermo.T, conc] +
                                list(data[:, 1].flat))
        obd_mat = np.matrix(
            obd_vec)  # rows are pathways and columns are concentrations
        plt.plot(conc_vec, obd_mat, '.-', figure=fig)
        legend += ['%s, pH = %g' % (l, pH) for l in labels]

    plt.title("ODB vs. [%s] (I = %gM, T = %gK)" %
              (compound_name, thermo.I, thermo.T),
              figure=fig)
    plt.xscale('log')
    plt.xlabel('Concentration of %s [M]' % thermo.kegg.cid2name(cid),
               figure=fig)
    plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig)
    plt.legend(legend)
    html_writer.write('<h2 id="figure_%s">Summary figure</h1>\n' % prefix)
    html_writer.embed_matplotlib_figure(fig, name=prefix)

    html_writer.close()
Beispiel #8
0
def AnalyzePareto(pathway_file, output_prefix, thermo, pH=None):
    pathway_list = KeggFile2PathwayList(pathway_file)
    pathway_names = [entry for (entry, _) in pathway_list]
    html_writer = HtmlWriter('%s.html' % output_prefix)
    xls_workbook = Workbook()

    logging.info("running OBD analysis for all pathways")
    data = GetAllOBDs(pathway_list,
                      html_writer,
                      thermo,
                      pH=pH,
                      section_prefix="pareto",
                      balance_water=True,
                      override_bounds={})

    for d in data:
        sheet = xls_workbook.add_sheet(d['entry'])
        sheet.write(0, 0, "reaction")
        sheet.write(0, 1, "formula")
        sheet.write(0, 2, "flux")
        sheet.write(0, 3, "delta_r G'")
        sheet.write(0, 4, "shadow price")
        for r, rid in enumerate(d['rids']):
            sheet.write(r + 1, 0, rid)
            sheet.write(r + 1, 1, d['formulas'][r])
            sheet.write(r + 1, 2, d['fluxes'][0, r])
            sheet.write(r + 1, 3, d['dG_r_prime'][0, r])
            sheet.write(r + 1, 4, d['reaction prices'][r, 0])

    xls_workbook.save('%s.xls' % output_prefix)

    obds = []
    minus_avg_tg = []
    for i, d in enumerate(data):
        obds.append(d['OBD'])
        if d['sum of fluxes']:
            minus_avg_tg.append(-d['max total dG'] / d['sum of fluxes'])
        else:
            minus_avg_tg.append(0)

    fig = plt.figure(figsize=(6, 6), dpi=90)
    plt.plot(minus_avg_tg, obds, 'o', figure=fig)
    plt.plot([0, max(minus_avg_tg)], [0, max(minus_avg_tg)], '--g')
    for i, name in enumerate(pathway_names):
        plt.text(minus_avg_tg[i], obds[i], name)
    plt.title('OBD vs. Average $\Delta_r G$')
    plt.ylim(ymin=0)
    plt.xlim(xmin=0)
    plt.xlabel(r'- Average $\Delta_r G$ [kJ/mol]')
    plt.ylabel(r'Optimized Distributed Bottleneck [kJ/mol]')
    html_writer.write('<h2>Pareto figure</h1>\n')
    html_writer.embed_matplotlib_figure(fig)
    html_writer.close()
Beispiel #9
0
def AnalyzePHGradient(pathway_file, output_prefix, thermo, conc_range):
    pathway_list = KeggFile2PathwayList(pathway_file)
    pathway_names = [entry for (entry, _) in pathway_list]
    html_writer = HtmlWriter('%s.html' % output_prefix)
    
    # run once just to make sure that the pathways are all working:
    logging.info("testing all pathways with default pH")
    data = GetAllOBDs(pathway_list, html_writer, thermo,
                  pH=None, section_prefix="test", balance_water=True,
                  override_bounds={})
    
    csv_output = csv.writer(open('%s.csv' % output_prefix, 'w'))
    csv_output.writerow(['pH'] + pathway_names)
    
    util._mkdir(output_prefix)
    shadow_csvs = {}
    for d in data:
        path = '%s/%s.csv' % (output_prefix, d['entry'])
        shadow_csvs[d['entry']] = csv.writer(open(path, 'w'))
        shadow_csvs[d['entry']].writerow(['pH'] + d['rids'])

    pH_vec = ParseConcentrationRange(conc_range)
    obd_mat = []
    for pH in pH_vec.flat:
        logging.info("pH = %.1f" % (pH))
        data = GetAllOBDs(pathway_list, html_writer=None, thermo=thermo,
                      pH=pH, section_prefix="", balance_water=True,
                      override_bounds={})
        obds = [d['OBD'] for d in data]
        obd_mat.append(obds)
        csv_output.writerow([data[0]['pH']] + obds)
        
        for d in data:
            if type(d['reaction prices']) != types.FloatType:
                prices = list(d['reaction prices'].flat)
                shadow_csvs[d['entry']].writerow([pH] + prices)
            
    obd_mat = np.matrix(obd_mat) # rows are pathways and columns are concentrations

    fig = plt.figure(figsize=(6, 6), dpi=90)
    colormap = color.ColorMap(pathway_names)
    for i, name in enumerate(pathway_names):
        plt.plot(pH_vec, obd_mat[:, i], '-', color=colormap[name], 
                 figure=fig)
    plt.title("OBD vs. pH", figure=fig)
    plt.ylim(0, np.max(obd_mat.flat))
    plt.xlabel('pH', figure=fig)
    plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig)
    plt.legend(pathway_names)
    html_writer.write('<h2>Summary figure</h1>\n')
    html_writer.embed_matplotlib_figure(fig)
    
    html_writer.close()
Beispiel #10
0
def example_reductive(thermo):
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/pathologic.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=15,
                    maximal_dG=0.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    add_cofactor_reactions(pl)
    add_redox_reactions(pl)
    r = Reaction.FromFormula("3 C00011 => C00022")
    #r.Balance()
    pl.find_path("reductive", r)
Beispiel #11
0
def example_oxidative(thermo):
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/pathologic.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=10,
                    maximal_dG=0,
                    thermodynamic_method=OptimizationMethods.MAX_TOTAL,
                    update_file=None)
    add_cofactor_reactions(pl)
    add_redox_reactions(pl, NAD_only=False)
    r = Reaction.FromFormula("C00022 => 3 C00011")
    #r.Balance()
    pl.find_path("oxidative", r)
Beispiel #12
0
def runBeta2Alpha(thermo, reactionList):
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/Beta2Alpha.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=15,
                    maximal_dG=0.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    add_cofactor_reactions(pl)
    add_redox_reactions(pl)
    for r in reactionList:
        pl.add_reaction(Reaction.FromFormula(r, "Auto generate #%s" % hash(r)))
    r = Reaction.FromFormula("C00099 => C01401")
    pl.find_path("Beta2Alpha", r)
Beispiel #13
0
def AnalyzePareto(pathway_file, output_prefix, thermo, pH=None):
    pathway_list = KeggFile2PathwayList(pathway_file)
    pathway_names = [entry for (entry, _) in pathway_list]
    html_writer = HtmlWriter('%s.html' % output_prefix)
    xls_workbook = Workbook()

    logging.info("running OBD analysis for all pathways")
    data = GetAllOBDs(pathway_list, html_writer, thermo,
                  pH=pH, section_prefix="pareto", balance_water=True,
                  override_bounds={})
    
    for d in data:
        sheet = xls_workbook.add_sheet(d['entry'])
        sheet.write(0, 0, "reaction")
        sheet.write(0, 1, "formula")
        sheet.write(0, 2, "flux")
        sheet.write(0, 3, "delta_r G'")
        sheet.write(0, 4, "shadow price")
        for r, rid in enumerate(d['rids']):
            sheet.write(r+1, 0, rid)
            sheet.write(r+1, 1, d['formulas'][r])
            sheet.write(r+1, 2, d['fluxes'][0, r])
            sheet.write(r+1, 3, d['dG_r_prime'][0, r])
            sheet.write(r+1, 4, d['reaction prices'][r, 0])
    
    xls_workbook.save('%s.xls' % output_prefix)

    obds = []
    minus_avg_tg = []
    for i, d in enumerate(data):
        obds.append(d['OBD'])
        if d['sum of fluxes']:
            minus_avg_tg.append(-d['max total dG']/d['sum of fluxes'])
        else:
            minus_avg_tg.append(0)
            
    fig = plt.figure(figsize=(6, 6), dpi=90)
    plt.plot(minus_avg_tg, obds, 'o', figure=fig)
    plt.plot([0, max(minus_avg_tg)], [0, max(minus_avg_tg)], '--g')
    for i, name in enumerate(pathway_names):
        plt.text(minus_avg_tg[i], obds[i], name)
    plt.title('OBD vs. Average $\Delta_r G$')
    plt.ylim(ymin=0)
    plt.xlim(xmin=0)
    plt.xlabel(r'- Average $\Delta_r G$ [kJ/mol]')
    plt.ylabel(r'Optimized Distributed Bottleneck [kJ/mol]')
    html_writer.write('<h2>Pareto figure</h1>\n')
    html_writer.embed_matplotlib_figure(fig)
    html_writer.close()
Beispiel #14
0
def example_lower_glycolysis(thermo):
    
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/pathologic.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=8,
                    maximal_dG=0.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    add_cofactor_reactions(pl)
    add_redox_reactions(pl)
    #r = Reaction.FromFormula("C00003 + C00118 + C00001 => C00022 + C00004 + C00009")
    r = Reaction.FromFormula("C00118 => C00022")
    #r.Balance()
    pl.find_path("GAP => PYR", r)
Beispiel #15
0
def example_rpi_bypass(thermo):
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/pathologic.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=10,
                    maximal_dG=0.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    add_cofactor_reactions(pl)
    #add_redox_reactions(pl)
    pl.delete_reaction(1056) # ribose-phosphate isomerase
    pl.delete_reaction(1081) # ribose isomerase

    r = Reaction.FromFormula("C00117 => C01182")
    #r.Balance()
    pl.find_path("rpi_bypass", r)
Beispiel #16
0
def example_three_acetate(thermo):
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/pathologic.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=20,
                    maximal_dG=0.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    add_cofactor_reactions(pl)
    #add_redox_reactions(pl)
    pl.delete_reaction(761) # F6P + Pi = E4P + acetyl-P
    pl.delete_reaction(1621) # X5P + Pi = GA3P + acetyl-P

    r = Reaction.FromFormula("C00031 => 3 C00033")
    #r.Balance()
    pl.find_path("three_acetate", r)
Beispiel #17
0
def compare_charges():
    #db_public = SqliteDatabase('../data/public_data.sqlite')
    db_gibbs = SqliteDatabase('../res/gibbs.sqlite')
    print "Writing Compare Charges report to ../res/groups_report.html"
    html_writer = HtmlWriter("../res/groups_report.html")
    kegg = Kegg.getInstance()

    #pH, I, pMg, T = default_pH, default_I, default_pMg, default_T
    pH, I, pMg, T = default_pH, 0, 14, default_T

    cid2error = {}
    for row_dict in db_gibbs.DictReader("gc_errors"):
        cid = int(row_dict['cid'])
        cid2error[cid] = row_dict['error']

    estimators = {}
    estimators['hatzi'] = Hatzi(use_pKa=False)
    estimators['milo'] = PsuedoisomerTableThermodynamics.FromDatabase(
        db_gibbs, 'gc_pseudoisomers', name='Milo Group Contribution')

    all_cids = set(lsum([e.get_all_cids() for e in estimators.values()]))
    dict_list = []
    for cid in all_cids:
        try:
            name = kegg.cid2name(cid)
            link = kegg.cid2compound(cid).get_link()
        except KeyError:
            name = "unknown"
            link = ""
        row_dict = {
            'cid': '<a href="%s">C%05d</a>' % (link, cid),
            'name': name,
            'error': cid2error.get(cid, None)
        }
        for key, est in estimators.iteritems():
            try:
                pmap = est.cid2PseudoisomerMap(cid)
                dG0, dG0_tag, nH, z, nMg = pmap.GetMostAbundantPseudoisomer(
                    pH, I, pMg, T)
            except MissingCompoundFormationEnergy:
                dG0, dG0_tag, nH, z, nMg = "", "", "", "", ""
            row_dict['nH_' + key] = nH
            row_dict['charge_' + key] = z
            row_dict['nMg_' + key] = nMg
            row_dict['dG0_' + key] = dG0
            row_dict['dG0_tag_' + key] = dG0_tag
        dict_list.append(row_dict)

    html_writer.write_table(
        dict_list,
        headers=['cid', 'name', 'charge_hatzi', 'charge_milo', 'error'])
    html_writer.close()
Beispiel #18
0
def example_glycolysis(thermo):
    
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/pathologic.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=15,
                    maximal_dG=0.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    add_cofactor_reactions(pl, free_ATP_hydrolysis=False)
    ban_toxic_compounds(pl)
    #add_carbon_counts(pl)
    #r = Reaction.FromFormula("C00031 => 6 C06265")
    r = Reaction.FromFormula("C00031 + 3 C00008 => 2 C00186 + 3 C00002")
    #r.Balance()
    pl.find_path("GLC => 2 LAC, 3 ATP, No methylglyoxal", r)
Beispiel #19
0
def main():
    estimators = LoadAllEstimators()
    parser = MakeArgParser(estimators)
    args = parser.parse_args()

    thermo = estimators[args.thermodynamics_source]

    kegg_file = ParsedKeggFile.FromKeggFile(args.config_fname)
    entries = kegg_file.entries()
    if len(entries) == 0:
        raise ValueError('No entries in configuration file')
    entry = 'CONFIGURATION'
    if entry not in entries:
        logging.warning(
            'Configuration file does not contain the entry "CONFIGURATION". '
            'Using the first entry by default: %s' % entries[0])
        entry = entries[0]
    p_data = PathwayData.FromFieldMap(kegg_file[entry])
    thermo.SetConditions(pH=p_data.pH, I=p_data.I, T=p_data.T, pMg=p_data.pMg)
    thermo.c_range = p_data.c_range
    bounds = p_data.GetBounds()

    html_writer = HtmlWriter(args.output_prefix + ".html")

    rowdicts = []
    headers = ['Module', 'Name', 'OBD [kJ/mol]', 'Length']
    kegg = Kegg.getInstance()
    for mid in kegg.get_all_mids():
        html_writer.write('<h2 id=M%05d>M%05d: %s</h2>' %
                          (mid, mid, kegg.get_module_name(mid)))
        try:
            d = AnalyzeKeggModule(thermo, mid, bounds, html_writer)
        except KeyError:
            continue
        d['Module'] = '<a href="#M%05d">M%05d</a>' % (mid, mid)
        d['Name'] = kegg.get_module_name(mid)
        rowdicts.append(d)

    rowdicts.sort(key=lambda x: x['OBD [kJ/mol]'])
    html_writer.write_table(rowdicts, headers, decimal=1)
    html_writer.close()
Beispiel #20
0
def example_more_than_two_pyruvate(thermo):
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/pathologic.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=20,
                    maximal_dG=0.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    #add_cofactor_reactions(pl)
    #add_XTP_reactions(pl, '=>')
    #add_redox_reactions(pl)
    #pl.delete_reaction(761) # F6P + Pi = E4P + acetyl-P
    #pl.delete_reaction(1621) # X5P + Pi = GA3P + acetyl-P

    r = Reaction.FromFormula("3 C00031 + 3 C00011 + C00003 => 7 C00022 + 3 C00001 + C00004")
    r.Balance()
    pl.find_path("more_than_two_pyr", r)
Beispiel #21
0
def example_glucose_to_ethanol_and_formate(thermo):
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/pathologic.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=15,
                    maximal_dG=0.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    #add_cofactor_reactions(pl)
    #add_XTP_reactions(pl, '=>')
    #add_redox_reactions(pl)
    #pl.delete_reaction(761) # F6P + Pi = E4P + acetyl-P
    #pl.delete_reaction(1621) # X5P + Pi = GA3P + acetyl-P

    r = Reaction.FromFormula("2 C00031 + 3 C00001 => 6 C00058 + 3 C00469")
    r.Balance()
    pl.find_path("glucose_to_ethanol_and_formate", r)
Beispiel #22
0
def AnalyzeConcentrationGradient(pathway_file, output_prefix, thermo, conc_range, cids=[], pH=None):
    compound_names = ','.join([thermo.kegg.cid2name(cid) for cid in cids])
    pathway_list = KeggFile2PathwayList(pathway_file)
    pathway_names = [entry for (entry, _) in pathway_list]
    html_writer = HtmlWriter('%s.html' % output_prefix)
    
    # run once just to make sure that the pathways are all working:
    logging.info("testing all pathways with default concentrations")
    data = GetAllOBDs(pathway_list, html_writer, thermo,
                  pH=pH, section_prefix="test", balance_water=True,
                  override_bounds={})
    
    csv_output = csv.writer(open('%s.csv' % output_prefix, 'w'))
    csv_output.writerow(['pH', '[' + compound_names + ']'] + pathway_names)

    conc_vec = 10**(-ParseConcentrationRange(conc_range)) # logarithmic scale between 10mM and 1nM
    override_bounds = {}
    
    obd_mat = []
    for conc in conc_vec.flat:
        for cid in cids:
            override_bounds[cid] = (conc, conc)
        logging.info("[%s] = %.1e M" % (compound_names, conc))
        data = GetAllOBDs(pathway_list, html_writer=None, thermo=thermo,
                      pH=pH, section_prefix="", balance_water=True,
                      override_bounds=override_bounds)
        obds = [d['OBD'] for d in data]
        obd_mat.append(obds)
        csv_output.writerow([data[0]['pH'], conc] + obds)
    obd_mat = np.matrix(obd_mat) # rows are pathways and columns are concentrations

    fig = plt.figure(figsize=(6, 6), dpi=90)
    colormap = color.ColorMap(pathway_names)
    for i, name in enumerate(pathway_names):
        plt.plot(conc_vec, obd_mat[:, i], '-', color=colormap[name], 
                 figure=fig)
    plt.title("OBD vs. [%s]" % (compound_names), figure=fig)
    plt.xscale('log')
    plt.ylim(ymin=0)
    plt.xlabel('[%s] (in M)' % compound_names, figure=fig)
    plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig)
    plt.legend(pathway_names)
    html_writer.write('<h2>Summary figure</h1>\n')
    html_writer.embed_matplotlib_figure(fig)
    html_writer.close()
Beispiel #23
0
def calc_cons_rxns_corr(thermo, name):
    html_fname = '../res/' + name + '_rev_pair_corr.html'
    logging.info('Writing HTML output to %s', html_fname)
    html_writer = HtmlWriter(html_fname)
    c_mid = 1e-4
    cmap = GetConcentrationMap()
    pH, pMg, I, T = (7.0, 3.0, 0.25, 298.15)
    
    (first, second) = get_reversibility_consecutive_pairs(thermo, c_mid, pH, pMg, I, T,
                                                  cmap=cmap, id=name)
    html_writer.write('<h1>' + name + ': Constrained co-factors</h1><br>')
    
    fig1 = cons_pairs_dot_plot (first, second, xlim=50)
    html_writer.embed_matplotlib_figure(fig1, width=640, height=480)
    pylab.savefig('../res/' + name + '_rev_pairs_corr.png', figure=fig1, format='png')
    
    fig2 = cons_pairs_dot_plot (first, second, xlim=10)    
    html_writer.embed_matplotlib_figure(fig2, width=640, height=480)
    pylab.savefig('../res/' + name + '_rev_pairs_corr_zoom.png', figure=fig2, format='png')
Beispiel #24
0
def example_formate(thermo, product_cid=22, co2_conc=1e-5):
    co2_hydration = Reaction.FromFormula("C00011 + C00001 => C00288")
    co2_hydration_dG0_prime = float(thermo.GetTransfromedKeggReactionEnergies([co2_hydration]))
    carbonate_conc = co2_conc * np.exp(-co2_hydration_dG0_prime / (R*default_T))
    thermo.bounds[11] = (co2_conc, co2_conc)
    thermo.bounds[288] = (carbonate_conc, carbonate_conc)
    
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/pathologic.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=20,
                    maximal_dG=0.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    add_cofactor_reactions(pl, free_ATP_hydrolysis=True)
    add_redox_reactions(pl, NAD_only=False)
   
    pl.delete_reaction(134) # formate:NADP+ oxidoreductase
    pl.delete_reaction(519) # Formate:NAD+ oxidoreductase
    pl.delete_reaction(24) # Rubisco
    pl.delete_reaction(581) # L-serine:NAD+ oxidoreductase (deaminating)
    pl.delete_reaction(220) # L-serine ammonia-lyase
    pl.delete_reaction(13) # glyoxylate carboxy-lyase (dimerizing; tartronate-semialdehyde-forming)
    pl.delete_reaction(585) # L-Serine:pyruvate aminotransferase
    pl.delete_reaction(1440) # D-Xylulose-5-phosphate:formaldehyde glycolaldehydetransferase
    pl.delete_reaction(5338) # 3-hexulose-6-phosphate synthase
    
    
    pl.add_reaction(Reaction.FromFormula("C06265 => C00011", name="CO2 uptake"))
    pl.add_reaction(Reaction.FromFormula("C06265 => C00288", name="carbonate uptake"))
    pl.add_reaction(Reaction.FromFormula("C06265 => C00058", name="formate uptake"))

    r = Reaction.FromFormula("5 C06265 + C00058 => C%05d" % product_cid) # at least one formate to product
    #r.Balance()
    
    kegg = Kegg.getInstance()
    pl.find_path("formate to %s" % kegg.cid2name(product_cid), r)
Beispiel #25
0
def AnalyzeConcentrationGradient(prefix, thermo, csv_output_fname, cid=13): # default compound is PPi
    compound_name = thermo.kegg.cid2name(cid)
    kegg_file = ParsedKeggFile.FromKeggFile('../data/thermodynamics/%s.txt' % prefix)
    html_writer = HtmlWriter('../res/%s.html' % prefix)
    null_html_writer = NullHtmlWriter()
    if csv_output_fname:
        csv_output = csv.writer(open(csv_output_fname, 'w'))
        csv_output.writerow(['pH', 'I', 'T', '[C%05d]' % cid] + kegg_file.entries())
    else:
        csv_output = None

    pH_vec = np.array([7]) # this needs to be fixed so that the txt file will set the pH
    conc_vec = 10**(-np.arange(2, 6.0001, 0.25)) # logarithmic scale between 10mM and 1nM
    override_bounds = {}
    
    fig = plt.figure(figsize=(6, 6), dpi=90)
    legend = []
    for pH in pH_vec.flat:
        obd_vec = []
        for conc in conc_vec.flat:
            override_bounds[cid] = (conc, conc)
            logging.info("pH = %g, [%s] = %.1e M" % (pH, compound_name, conc))
            data, labels = pareto(kegg_file, null_html_writer, thermo,
                pH=pH, section_prefix="", balance_water=True,
                override_bounds=override_bounds)
            obd_vec.append(data[:, 1])
            csv_output.writerow([pH, thermo.I, thermo.T, conc] + list(data[:, 1].flat))
        obd_mat = np.matrix(obd_vec) # rows are pathways and columns are concentrations
        plt.plot(conc_vec, obd_mat, '.-', figure=fig)
        legend += ['%s, pH = %g' % (l, pH) for l in labels]
    
    plt.title("ODB vs. [%s] (I = %gM, T = %gK)" % (compound_name, thermo.I, thermo.T), figure=fig)
    plt.xscale('log')
    plt.xlabel('Concentration of %s [M]' % thermo.kegg.cid2name(cid), figure=fig)
    plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig)
    plt.legend(legend)
    html_writer.write('<h2 id="figure_%s">Summary figure</h1>\n' % prefix)
    html_writer.embed_matplotlib_figure(fig, name=prefix)
    
    html_writer.close()
Beispiel #26
0
def main():
    estimators = LoadAllEstimators()
    parser = MakeArgParser(estimators)
    args = parser.parse_args()

    thermo = estimators[args.thermodynamics_source]

    kegg_file = ParsedKeggFile.FromKeggFile(args.config_fname)
    entries = kegg_file.entries()
    if len(entries) == 0:
        raise ValueError('No entries in configuration file')
    entry = 'CONFIGURATION'
    if entry not in entries:
        logging.warning('Configuration file does not contain the entry "CONFIGURATION". '
                        'Using the first entry by default: %s' % entries[0])
        entry = entries[0]
    p_data = PathwayData.FromFieldMap(kegg_file[entry])
    thermo.SetConditions(pH=p_data.pH, I=p_data.I, T=p_data.T, pMg=p_data.pMg)
    thermo.c_range = p_data.c_range
    bounds = p_data.GetBounds()
    
    html_writer = HtmlWriter(args.output_prefix + ".html")

    rowdicts = []
    headers = ['Module', 'Name', 'OBD [kJ/mol]', 'Length']
    kegg = Kegg.getInstance()
    for mid in kegg.get_all_mids():
        html_writer.write('<h2 id=M%05d>M%05d: %s</h2>' %
                          (mid, mid, kegg.get_module_name(mid)))
        try:
            d = AnalyzeKeggModule(thermo, mid, bounds, html_writer)
        except KeyError:
            continue
        d['Module'] = '<a href="#M%05d">M%05d</a>' % (mid, mid)
        d['Name'] = kegg.get_module_name(mid)
        rowdicts.append(d)
    
    rowdicts.sort(key=lambda x:x['OBD [kJ/mol]'])
    html_writer.write_table(rowdicts, headers, decimal=1)
    html_writer.close()
Beispiel #27
0
def runPathologic(thermo, reactionList):
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/mog_finder.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=15,
                    maximal_dG=-3.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    add_cofactor_reactions(pl)
    add_redox_reactions(pl)
    for r in reactionList:
        pl.add_reaction(Reaction.FromFormula(r, "Auto generate #%s" % hash(r)))
    pl.delete_reaction(134)
    pl.delete_reaction(344)
    pl.delete_reaction(575)
    pl.delete_reaction(212)
    #pl.add_reaction(Reaction.FromFormula('C00149 + C00006 <=> C00036 + C00005 + C00080',
    #                                     'malate + NADP+ = oxaloacetate + NADPH',343))
    #pl.add_reaction(Reaction.FromFormula('C00222 + C00010 + C00006 <=> C00083 + C00005',
    #                                     'malonate-semialdehyde + CoA + NADP+ = malonyl-CoA + NADPH',740))
    r = Reaction.FromFormula("2 C00288 => C00048")
    pl.find_path("MOG_finder", r)
Beispiel #28
0
    def find_path(self, experiment_name, net_reaction):
        """Find a pathway from the source to the target.
        
        Args:    
            experiment_name: a name given to this experiment.
            net_reaction: a Reaction describing the net reaction for the desired paths
        """
        dirname = os.path.join('../res/pathologic/', experiment_name)
        logging.info('Writing output to: %s' % dirname)
        util._mkdir(dirname)
        
        self.html_writer.write('<a href="pathologic/' + experiment_name + '.html">' + experiment_name + '</a><br>\n')
        exp_html = HtmlWriter('../res/pathologic/' + experiment_name + '.html')
        exp_html.write("<p><h1>%s</h1>\n" % experiment_name)

        exp_html.insert_toggle(div_id="__parameters__", start_here=True,
                               label='Show Parameters')
        
        f, S, compounds, reactions = self.kegg_pathologic.get_unique_cids_and_reactions()

        exp_html.write('<h2>Conditions:</h2>\n')
        exp_html.write_ul(['Optimization method: %s' % self.thermodynamic_method,
                           'Concentration range: %g M < C < %g M' % (self.thermo.c_range[0], self.thermo.c_range[1]),
                           "Max &Delta;<sub>r</sub>G' = %.1f" % self.maximal_dG,
                           'pH = %g' % self.thermo.pH,
                           'I = %g' % self.thermo.I,
                           'T = %g' % self.thermo.T,
                           'Max no. reactions: %d' % (self.max_reactions or -1),
                           'Max no. solutions: %d' % (self.max_solutions or -1),
                           'Overall Reaction: %s' % net_reaction.to_hypertext(),
                           '%d reactions' % len(reactions),
                           '%d unique compounds' % len(compounds)])

        exp_html.div_end()
        exp_html.write('</br>\n')
        
        logging.debug("All compounds:")
        for i, compound in enumerate(compounds):
            logging.debug("%05d) C%05d = %s" % (i, compound.cid, compound.name))
        logging.debug("All reactions:")
        for i, reaction in enumerate(reactions):
            logging.debug("%05d) R%05d = %s" % (i, reaction.rid, str(reaction)))

        output_kegg_file = open(dirname + '/kegg_pathway.txt', 'w')
        exp_html.write('<a href="%s/kegg_pathway.txt">All solutions in KEGG format</a></br>\n'
                       % experiment_name)
        
        # Find a solution with a minimal total flux
        logging.info("Preparing LP solver for the minimal total flux problem")
        exp_html.write('<b>Minimum flux</b>')
        slip = Stoichiometric_LP("Pathologic")
        slip.add_stoichiometric_constraints(f, S, compounds, reactions, net_reaction)
        slip.export("../res/pathologic/%s/%03d_lp.txt" % (experiment_name, 0))
        exp_html.write(' (<a href="%s/%03d_lp.txt">LP file</a>): ' % (experiment_name, 0))
        logging.info("Solving")
        if not slip.solve():
            exp_html.write("<b>There are no solutions!</b>")
            logging.warning("There are no solutions. Quitting!")
            return
        logging.info("writing solution")
        self.write_current_solution(exp_html, slip, experiment_name)

        logging.info("Preparing MILP solver")
        milp = Stoichiometric_LP("Pathologic")
        milp.solution_index = 1
        milp.add_stoichiometric_constraints(f, S, compounds, reactions, net_reaction)
        milp.add_milp_variables()
        if self.max_reactions is not None:
            milp.add_reaction_num_constraint(self.max_reactions)
       
        if self.thermodynamic_method == OptimizationMethods.LOCALIZED:
            milp.add_localized_dGf_constraints(self.thermo)
        else:
            milp.add_dGr_constraints(self.thermo,
                                     optimization=self.thermodynamic_method,
                                     maximal_dG=self.maximal_dG)
        
        index = 0
        while (self.max_solutions is None) or (index < self.max_solutions):
            index += 1
            # create the MILP problem to constrain the previous solutions not to reappear again.
            logging.info("Round %03d, solving using MILP" % (milp.solution_index))
            milp.export("../res/pathologic/%s/%03d_lp.txt" % (experiment_name, milp.solution_index))
            exp_html.write('<b>Solution #%d</b> (<a href="%s/%03d_lp.txt">LP file</a>): '  % (index, experiment_name, index))
            if not milp.solve():
                exp_html.write("<b>No solution found</b>")
                logging.info("No more solutions. Quitting!")
                break
            logging.info("writing solution")
            self.write_current_solution(exp_html, milp, experiment_name,
                                        output_kegg_file)
            milp.ban_current_solution()
        
        output_kegg_file.close()
        exp_html.close()
def main():
    db = database.SqliteDatabase('../res/gibbs.sqlite')
    html_writer = HtmlWriter("../res/nist/report.html")
    gc = GroupContribution(db)
    gc.override_gc_with_measurements = True
    gc.init()
    grad = GradientAscent(gc)
    nist = Nist(db, html_writer, gc.kegg())
    nist.FromDatabase()
    alberty = Alberty()
    hatzi = Hatzi()

    if True:
        grad.load_nist_data(nist,
                            alberty,
                            skip_missing_reactions=False,
                            T_range=(298, 314))
        grad.verify_results("Alberty", alberty, html_writer)

        #grad.write_pseudoisomers("../res/nist/nist_dG0_f.csv")

        #html_writer.write("<h2>Using Group Contribution (Hatzimanikatis' implementation)</h2>")
        #html_writer.write("<h3>Correlation with the reduced NIST database (containing only compounds that appear in Alberty's list)</h3>")
        #logging.info("calculate the correlation between Hatzimanikatis' predictions and the reduced NIST database")
        #grad.verify_results("Hatzimanikatis_Reduced", hatzi, html_writer)

        #grad.load_nist_data(nist, hatzi, skip_missing_reactions=True, T_range=(298, 314))
        grad.verify_results("Hatzimanikatis", hatzi, html_writer)

        #grad.load_nist_data(nist, gc, skip_missing_reactions=True, T_range=(298, 314))
        grad.verify_results("Milo", gc, html_writer)
    elif False:
        # Run the gradient ascent algorithm, where the starting point is the same file used for training the GC algorithm
        grad.load_dG0_data("../data/thermodynamics/dG0.csv")
        # load the data for the anchors (i.e. compounds whose dG0 should not be changed - usually their value will be 0).
        grad.anchors = grad.load_dG0_data(
            "../data/thermodynamics/nist_anchors.csv")
        grad.load_nist_data(nist, grad, skip_missing_reactions=True)
        print "Training %d compounds using %d reactions: " % (len(
            grad.cid2pmap_dict.keys()), len(grad.data))
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient1")

    elif False:
        # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006)
        grad.load_nist_data(nist, alberty, skip_missing_reactions=True)
        print "Training %d compounds using %d reactions: " % (len(
            grad.cid2pmap_dict.keys()), len(grad.data))
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient2")

    elif False:
        # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006)
        # Use DETERMINISTIC gradient ascent
        grad.load_nist_data(nist,
                            alberty,
                            skip_missing_reactions=True,
                            T_range=(24 + 273.15, 40 + 273.15))
        print "Training %d compounds using %d reactions: " % (len(
            grad.cid2pmap_dict.keys()), len(grad.data))
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        grad.deterministic_hill_climb(max_i=200)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient_deterministic")

    elif False:
        # Run the gradient ascent algorithm, where the starting point arbitrary (predict all of the NIST compounds)
        grad = GradientAscent(gc)
        grad.load_nist_data(nist, skip_missing_reactions=False)
        print "Training %d compounds using %d reactions: " % (len(
            grad.cid2pmap_dict.keys()), len(grad.data))
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient3")

    elif False:  # Use Alberty's table from (Mathematica 2006) to calculate the dG0 of all possible reactions in KEGG
        grad = GradientAscent(gc)
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        (pH, I, T) = (7, 0, 300)
        counter = 0
        for rid in grad.kegg.get_all_rids():
            sparse_reaction = grad.kegg.rid2sparse_reaction(rid)
            try:
                dG0 = grad.reaction_to_dG0(sparse_reaction, pH, I, T)
                print "R%05d: dG0_r = %.2f [kJ/mol]" % (rid, dG0)
                counter += 1
            except MissingCompoundFormationEnergy as e:
                #print "R%05d: missing formation energy of C%05d" % (rid, e.cid)
                pass
        print "Managed to calculate the dG0 of %d reactions" % counter

    elif False:
        util._mkdir("../res/nist/fig")
        csv_writer = csv.writer(open("../res/nist/pseudoisomers.csv", "w"))

        cid_set = set()
        for row in nist.data:
            sparce_reaction = row['sparse']
            cid_set.update(sparce_reaction.keys())

        html_writer.write("<table border=1>\n")
        for cid in sorted(list(cid_set)):
            html_writer.write("  <tr><td>C%05d</td><td>%s</td><td>" %
                              (cid, grad.kegg.cid2name(cid)))
            try:
                mol = grad.kegg.cid2mol(cid)
                img_fname = '../res/nist/fig/C%05d.png' % cid
                html_writer.embed_img(img_fname, "C%05d" % cid)
                mol.draw(show=False, filename=img_fname)
            except AssertionError as e:
                html_writer.write("WARNING: cannot draw C%05d - %s" %
                                  (cid, str(e)))
            except KeggParseException as e:
                html_writer.write("WARNING: cannot draw C%05d - %s" %
                                  (cid, str(e)))
            html_writer.write("</td><td>")
            if (cid in alberty.cid2pmap_dict):
                for (nH, z) in alberty.cid2pmap_dict[cid].keys():
                    html_writer.write("(nH=%d, z=%d)<br>" % (nH, z))
                    csv_writer.writerow((cid, nH, z))
            else:
                nH = grad.kegg.cid2num_hydrogens(cid)
                z = grad.kegg.cid2charge(cid)
                html_writer.write("unknown pseudoisomers<br>")
                html_writer.write("(nH=%d, z=%d)" % (nH, z))
                csv_writer.writerow((cid, nH, z))

            html_writer.write("</td></tr>\n")
        html_writer.write("</table>\n")
    html_writer.close()
    args, _ = MakeOpts(estimators).parse_args(sys.argv)
    input_filename = os.path.abspath(args.input_filename)
    output_filename = os.path.abspath(args.output_filename)
    if not os.path.exists(input_filename):
        logging.fatal('Input filename %s doesn\'t exist' % input_filename)
        
    print 'Will read pathway definitions from %s' % input_filename
    print 'Will write output to %s' % output_filename
    
    db_loc = args.db_filename
    print 'Reading from DB %s' % db_loc
    db = SqliteDatabase(db_loc)

    thermo = estimators[args.thermodynamics_source]
    print "Using the thermodynamic estimations of: " + thermo.name
    
    kegg = Kegg.getInstance()
    thermo.bounds = deepcopy(kegg.cid2bounds)
    
    dirname = os.path.dirname(output_filename)
    if not os.path.exists(dirname):
        print 'Making output directory %s' % dirname
        _mkdir(dirname)
    
    print 'Executing thermodynamic pathway analysis'
    html_writer = HtmlWriter(output_filename)
    thermo_analyze = ThermodynamicAnalysis(db, html_writer, thermodynamics=thermo)
    thermo_analyze.analyze_pathway(input_filename)

    
Beispiel #31
0
                continue
            if self.override_pMg or self.override_I or self.override_T:
                nist_row_copy = nist_row_data.Clone()
                if self.override_pMg:
                    nist_row_copy.pMg = self.override_pMg
                if self.override_I:
                    nist_row_copy.I = self.override_I
                if self.override_T:
                    nist_row_copy.T = self.override_T
                rows.append(nist_row_copy)
            else:
                rows.append(nist_row_data)
        return rows
    
    def GetUniqueReactionSet(self):
        return set([row.reaction for row in self.data])


if __name__ == '__main__':
    #logging.getLogger('').setLevel(logging.DEBUG)
    _mkdir("../res/nist")
    html_writer = HtmlWriter("../res/nist/statistics.html")
    nist = Nist()
    fp = open('../res/nist_kegg_ids.txt', 'w')
    for cid in nist.GetAllCids():
        fp.write("C%05d\n" % cid)
    fp.close()
    nist.AnalyzeStats(html_writer)
    nist.AnalyzeConnectivity(html_writer)
    html_writer.close()
Beispiel #32
0
def main():
    html_writer = HtmlWriter("../res/nist/report.html")
    estimators = LoadAllEstimators()
    nist = Nist()
    nist.T_range = (273.15 + 24, 273.15 + 40)
    #nist.override_I = 0.25
    #nist.override_pMg = 14.0
    #nist.override_T = 298.15
    
    html_writer.write('<p>\n')
    html_writer.write("Total number of reaction in NIST: %d</br>\n" % len(nist.data))
    html_writer.write("Total number of reaction in range %.1fK < T < %.1fK: %d</br>\n" % \
                      (nist.T_range[0], nist.T_range[1], len(nist.SelectRowsFromNist())))
    html_writer.write('</p>\n')

    reactions = {}
    reactions['KEGG'] = []
    for reaction in Kegg.getInstance().AllReactions():
        try:
            reaction.Balance(balance_water=True, exception_if_unknown=True)
            reactions['KEGG'].append(reaction)
        except (KeggReactionNotBalancedException, KeggParseException, OpenBabelError):
            pass
        
    reactions['FEIST'] = Feist.FromFiles().reactions
    reactions['NIST'] = nist.GetUniqueReactionSet()
    
    pairs = []
    #pairs += [('hatzi_gc', 'UGC')], ('PGC', 'PRC'), ('alberty', 'PRC')]
    for t1, t2 in pairs:
        logging.info('Writing the NIST report for %s vs. %s' % 
                     (estimators[t1].name, estimators[t2].name))
        html_writer.write('<p><b>%s vs. %s</b> ' % 
                     (estimators[t1].name, estimators[t2].name))
        html_writer.insert_toggle(start_here=True)
        two_way_comparison(html_writer=html_writer, 
                           thermo1=estimators[t1],
                           thermo2=estimators[t2],
                           reaction_list=reactions['FEIST'],
                           name='%s_vs_%s' % (t1, t2))
        html_writer.div_end()
        html_writer.write('</p>')
    
    if False:
        estimators['alberty'].CompareOverKegg(html_writer, 
                                              other=estimators['PRC'],
                                              fig_name='kegg_compare_alberty_vs_nist')
    
    rowdicts = []
    rowdict = {'Method': 'Total'}
    for db_name, reaction_list in reactions.iteritems():
        rowdict[db_name + ' coverage'] = len(reaction_list)
    rowdicts.append(rowdict)
    
    for name in ['UGC', 'PGC', 'PRC', 'alberty', 'merged', 'hatzi_gc']:
        thermo = estimators[name]
        logging.info('Writing the NIST report for %s' % thermo.name)
        html_writer.write('<p><b>%s</b> ' % thermo.name)
        html_writer.insert_toggle(start_here=True)
        num_estimations, rmse = nist.verify_results(html_writer=html_writer, 
                                                    thermodynamics=thermo,
                                                    name=name)
        html_writer.div_end()
        html_writer.write('N = %d, RMSE = %.1f</p>\n' % (num_estimations, rmse))
        logging.info('N = %d, RMSE = %.1f' % (num_estimations, rmse))
        
        rowdict = {'Method':thermo.name,
            'RMSE (kJ/mol)':"%.1f (N=%d)" % (rmse, num_estimations)}
        for db_name, reaction_list in reactions.iteritems():
            n_covered = thermo.CalculateCoverage(reaction_list)
            percent = n_covered * 100.0 / len(reaction_list)
            rowdict[db_name + " coverage"] = "%.1f%% (%d)" % (percent, n_covered)
            logging.info(db_name + " coverage = %.1f%%" % percent)
        rowdicts.append(rowdict)
    
    headers = ['Method', 'RMSE (kJ/mol)'] + \
        [db_name + ' coverage' for db_name in reactions.keys()]
    html_writer.write_table(rowdicts, headers=headers)
Beispiel #33
0
def analyse_reversibility(thermo, name):
    html_fname = '../res/' + name + '_reversibility.html'
    logging.info('Writing HTML output to %s', html_fname)
    html_writer = HtmlWriter(html_fname)
    cmap = GetConcentrationMap()
    
    histogram, rel_histogram, perc_first_max = calculate_reversibility_histogram(
        thermo, cmap=cmap, id=name)
    
    html_writer.write('<h1>' + name + ': Constrained co-factors</h1>Percentage of modules where first reaction is the maximal: %f<br>' % perc_first_max)
    # deltaG plot fig1 = plot_histogram(histogram, html_writer, title='With constraints on co-factors', legend_loc='lower right' , xlim=80)
    fig1 = plot_histogram(histogram, html_writer, title='With constraints on co-factors', xlim=10)
    
    html_writer.embed_matplotlib_figure(fig1, width=640, height=480)
    pylab.savefig('../res/' + name + '_kegg_reversibility1.png', figure=fig1, format='png')
    
    #fig1_bs = plot_bootstrap_stats(histogram, title='With constraints on co-factors')
    #html_writer.embed_matplotlib_figure(fig1_bs, width=640, height=480)
    #pylab.savefig('../res/' + name + '_kegg_reversibility1_bs.png', figure=fig1_bs, format='png')
    
    fig1_rel = plot_histogram(rel_histogram, html_writer, title='Normed per module with constraints on co-factors', xlim=5)
    html_writer.embed_matplotlib_figure(fig1_rel, width=640, height=480)
    pylab.savefig('../res/' + name + '_kegg_reversibility1_rel.png', figure=fig1_rel, format='png')
    
    histogram, rel_histogram, perc_first_max = calculate_reversibility_histogram(
        thermo, cmap={}, id=name)

    html_writer.write('<h1>' + name + ': Non constrained co-factors</h1>Percentage of modules where first reaction is the maximal: %f<br>' % perc_first_max)
    fig2 = plot_histogram(histogram, html_writer, title='No constraints on co-factors', xlim=20)
    html_writer.embed_matplotlib_figure(fig2, width=640, height=480)
    pylab.savefig('../res/' + name + '_kegg_reversibility2.png', figure=fig2, format='png')
    
    fig2_rel = plot_histogram(rel_histogram, html_writer, title='Normed per module, no constraints on co-factors', xlim=5)
    html_writer.embed_matplotlib_figure(fig2_rel, width=640, height=480)
    pylab.savefig('../res/' + name + '_kegg_reversibility2_rel.png', figure=fig2_rel, format='png')
Beispiel #34
0
def metacyc_data(org, id, thermo, max_pathway_length_for_fig=8):
    db = SqliteDatabase('../res/gibbs.sqlite')
    html_writer = HtmlWriter('../res/' + org + '_' + id + '_reversibility.html')
    metacyc_inst = MetaCyc(org, db)
    cmap = GetConcentrationMap()
    
    (histogram,rel_histogram,perc_first_max, reg_hist) = calculate_metacyc_reversibility_histogram(thermo, metacyc_inst,
                                                  cmap=cmap, id=(org + '_' + id))
    
    html_writer.write('<h1>Constrained co-factors</h1>Percentage of modules where first reaction is the maximal: %f<br>' % perc_first_max)
    # deltaG plot fig1 = plot_histogram(histogram, html_writer, title=('%s pathways: With constraints on co-factors' % org), legend_loc='lower right' , xlim=80)
    fig1 = plot_histogram(histogram, html_writer, title=('%s pathways: With constraints on co-factors' % org), xlim=10)
    html_writer.embed_matplotlib_figure(fig1, width=640, height=480)
    pylab.savefig('../res/' + org + '_' + id +  '_reversibility1.png', figure=fig1, format='png')

    fig1_reg = plot_bars(reg_hist, title=('%s pathways: Position of regulated reactions' % org), max_pathway_length=max_pathway_length_for_fig)
    html_writer.embed_matplotlib_figure(fig1_reg, width=640, height=480)
    pylab.savefig('../res/' + org + '_' + id +  '_reg_rxns.png', figure=fig1_reg, format='png')
    
    fig1_reg_stacked = plot_stacked_bars(reg_hist, title=('%s pathways: Position of regulated reactions' % org), max_pathway_length=max_pathway_length_for_fig)
    html_writer.embed_matplotlib_figure(fig1_reg_stacked, width=640, height=480)
    pylab.savefig('../res/' + org + '_' + id +  '_reg_stacked_rxns.png', figure=fig1_reg_stacked, format='png')

    #fig1_bs = plot_bootstrap_stats(histogram, title=('%s pathways: With constraints on co-factors' % org))
    #html_writer.embed_matplotlib_figure(fig1_bs, width=640, height=480)
    #pylab.savefig('../res/' + org + '_' + id +  '_reversibility1_bs.png', figure=fig1_bs, format='png')    
    

    fig1_rel = plot_histogram(rel_histogram, html_writer, title=('%s pathways: Normed per pathway with constraints on co-factors' % org), xlim=5)
    html_writer.embed_matplotlib_figure(fig1_rel, width=640, height=480)
    pylab.savefig('../res/' + org + '_' + id + '_reversibility1_rel.png', figure=fig1_rel, format='png')
    
    (histogram,rel_histogram,perc_first_max, reg_hist) = calculate_metacyc_reversibility_histogram(thermo, metacyc_inst,
                                                  cmap={}, id=(org + '_' + id))
    
    html_writer.write('<h1>Non constrained co-factors</h1>Percentage of modules where first reaction is the maximal: %f<br>' % perc_first_max)
    fig2 = plot_histogram(histogram, html_writer, title=('%s pathways: No constraints on co-factors' % org ), xlim=20)
    html_writer.embed_matplotlib_figure(fig2, width=640, height=480)
    pylab.savefig('../res/' + org + '_' + id + '_reversibility2.png', figure=fig1, format='png')

    fig2_rel = plot_histogram(rel_histogram, html_writer, title=('%s pathways: Normed per pathway no constraints on co-factors' % org), xlim=5)
    html_writer.embed_matplotlib_figure(fig2_rel, width=640, height=480)
    pylab.savefig('../res/' + org + '_' + id + '_reversibility2_rel.png', figure=fig2_rel, format='png')
Beispiel #35
0
                continue
            if self.override_pMg or self.override_I or self.override_T:
                nist_row_copy = nist_row_data.Clone()
                if self.override_pMg:
                    nist_row_copy.pMg = self.override_pMg
                if self.override_I:
                    nist_row_copy.I = self.override_I
                if self.override_T:
                    nist_row_copy.T = self.override_T
                rows.append(nist_row_copy)
            else:
                rows.append(nist_row_data)
        return rows

    def GetUniqueReactionSet(self):
        return set([row.reaction for row in self.data])


if __name__ == '__main__':
    #logging.getLogger('').setLevel(logging.DEBUG)
    _mkdir("../res/nist")
    html_writer = HtmlWriter("../res/nist/statistics.html")
    nist = Nist()
    fp = open('../res/nist_kegg_ids.txt', 'w')
    for cid in nist.GetAllCids():
        fp.write("C%05d\n" % cid)
    fp.close()
    nist.AnalyzeStats(html_writer)
    nist.AnalyzeConnectivity(html_writer)
    html_writer.close()
                        '--leave_one_out',
                        action='store_true',
                        default=False,
                        help='A flag for running the Leave One Out analysis')
    return parser


if __name__ == "__main__":
    logger = logging.getLogger('')
    logger.setLevel(logging.DEBUG)

    parser = MakeOpts()
    args = parser.parse_args()
    util._mkdir('../res')
    db = SqliteDatabase('../res/gibbs.sqlite', 'w')
    html_writer = HtmlWriter('../res/ugc.html')

    ugc = UnifiedGroupContribution(db,
                                   html_writer,
                                   anchor_all=args.anchor_all_formations)
    ugc.LoadGroups(FromDatabase=(not args.recalc_groups))
    ugc.LoadObservations(FromDatabase=(not args.recalc_observations))
    ugc.LoadGroupVectors(FromDatabase=(not args.recalc_groupvectors))
    ugc.LoadData(FromDatabase=(not args.recalc_matrices))

    if args.dump:
        ugc.SaveDataToMatfile()
        sys.exit(0)
    if args.train:
        ugc.EstimateKeggCids()
        sys.exit(0)
Beispiel #37
0
def main():
    options, _ = MakeOpts().parse_args(sys.argv)
    
    db = SqliteDatabase("../res/gibbs.sqlite")
    public_db = SqliteDatabase("../data/public_data.sqlite")
    output_filename = os.path.abspath(options.output_filename)
    logging.info('Will write output to %s' % output_filename)
    
    html_writer = HtmlWriter(output_filename)
    nist = Nist(T_range=None)
    nist_regression = NistRegression(db, html_writer=html_writer, nist=nist)
    nist_regression.std_diff_threshold = 5 # the threshold over which to print an analysis of a reaction
    #nist_regression.nist.T_range = None(273.15 + 24, 273.15 + 40)
    #nist_regression.nist.override_I = 0.25
    #nist_regression.nist.override_pMg = 14.0

    html_writer.write("<h2>NIST regression:</h2>")
    if options.use_prior:
        logging.info('Using the data from Alberty as fixed prior')
        prior_thermo = PsuedoisomerTableThermodynamics.FromDatabase(
            public_db, 'alberty_pseudoisomers', name="Alberty")
    else:
        prior_thermo = None
    html_writer.write('</br><b>Regression Tables</b>\n')
    html_writer.insert_toggle(start_here=True)
    nist_regression.Train(options.from_database, prior_thermo)
    html_writer.div_end()
 
    html_writer.write('</br><b>PRC results</b>\n')
    html_writer.insert_toggle(start_here=True)
    nist_regression.WriteDataToHtml(html_writer)
    html_writer.div_end()

    html_writer.write('</br><b>Transformed reaction energies - PRC vs. Observed</b>\n')
    html_writer.insert_toggle(start_here=True)
    N, rmse = nist_regression.VerifyResults()
    html_writer.div_end()
    
    logging.info("Regression results for transformed data:")
    logging.info("N = %d, RMSE = %.1f" % (N, rmse))

    html_writer.close()
Beispiel #38
0
    return parser


if __name__ == '__main__':
    parser = MakeOpts()
    args = parser.parse_args()
    util._mkdir('../res')
    db = SqliteDatabase('../res/gibbs.sqlite', 'w')

    if args.transformed:
        prefix = 'bgc'
    else:
        prefix = 'pgc'

    if args.test_only:
        html_writer = HtmlWriter('../res/%s_test.html' % prefix)
    elif args.train_only:
        html_writer = HtmlWriter('../res/%s_train.html' % prefix)
    else:
        html_writer = HtmlWriter('../res/%s.html' % prefix)

    G = GroupContribution(db=db,
                          html_writer=html_writer,
                          transformed=args.transformed)

    G.LoadGroups(FromDatabase=args.from_database, FromFile=args.groups_species)
    G.LoadObservations(args.from_database)
    G.LoadGroupVectors(args.from_database)

    if args.test_only:
        G.LoadContributionsFromDB()
Beispiel #39
0
    #m = Molecule.FromInChI('InChI=1/CO2/c2-1-3'); m.SetTitle('CO2')
    #m = Molecule.FromInChI('InChI=1/CO/c1-2'); m.SetTitle('CO')
    #m = Molecule.FromInChI('InChI=1/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(26-10)1-25-30(21,22)28-31(23,24)27-29(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H,23,24)(H2,11,12,13)(H2,18,19,20)/t4-,6-,7-,10-/m1/s1'); m.SetTitle('ATP')
    #m = Molecule.FromSmiles("P(=O)(O)(O)O")
    
    #print m.ToFormat('mol')
    #print m.ToFormat('mol2')
    #print m.ToFormat('smi')
    #print m.ToFormat('inchi')
    #print m.ToFormat('sdf')

    diss_table = Molecule._GetDissociationTable('C(=O)(O)CN', fmt='smiles',
                 mid_pH=default_pH, min_pKa=0, max_pKa=14, T=default_T)
    print "glycine\n", diss_table
    
    html_writer = HtmlWriter('../res/molecule.html')
    from pygibbs.kegg import Kegg
    kegg = Kegg.getInstance()
    html_writer.write('<h1>pKa estimation using ChemAxon</h1>\n')
    for cid in [41]:
        m = kegg.cid2mol(cid)
        html_writer.write("<h2>C%05d : %s</h2>\n" % (cid, str(m)))
        diss_table = m.GetDissociationTable()
        pmap = diss_table.GetPseudoisomerMap()
        diss_table.WriteToHTML(html_writer)
        pmap.WriteToHTML(html_writer)
        html_writer.write("</p>\n")
        #print m.GetDissociationConstants()
        #print m.GetMacrospecies()

    #obmol = m.ToOBMol()
Beispiel #40
0
def AnalyzeConcentrationGradient(pathway_file,
                                 output_prefix,
                                 thermo,
                                 conc_range,
                                 cids=[],
                                 pH=None):
    compound_names = ','.join([thermo.kegg.cid2name(cid) for cid in cids])
    pathway_list = KeggFile2PathwayList(pathway_file)
    pathway_names = [entry for (entry, _) in pathway_list]
    html_writer = HtmlWriter('%s.html' % output_prefix)

    # run once just to make sure that the pathways are all working:
    logging.info("testing all pathways with default concentrations")
    data = GetAllOBDs(pathway_list,
                      html_writer,
                      thermo,
                      pH=pH,
                      section_prefix="test",
                      balance_water=True,
                      override_bounds={})

    csv_output = csv.writer(open('%s.csv' % output_prefix, 'w'))
    csv_output.writerow(['pH', '[' + compound_names + ']'] + pathway_names)

    conc_vec = 10**(-ParseConcentrationRange(conc_range)
                    )  # logarithmic scale between 10mM and 1nM
    override_bounds = {}

    obd_mat = []
    for conc in conc_vec.flat:
        for cid in cids:
            override_bounds[cid] = (conc, conc)
        logging.info("[%s] = %.1e M" % (compound_names, conc))
        data = GetAllOBDs(pathway_list,
                          html_writer=None,
                          thermo=thermo,
                          pH=pH,
                          section_prefix="",
                          balance_water=True,
                          override_bounds=override_bounds)
        obds = [d['OBD'] for d in data]
        obd_mat.append(obds)
        csv_output.writerow([data[0]['pH'], conc] + obds)
    obd_mat = np.matrix(
        obd_mat)  # rows are pathways and columns are concentrations

    fig = plt.figure(figsize=(6, 6), dpi=90)
    colormap = color.ColorMap(pathway_names)
    for i, name in enumerate(pathway_names):
        plt.plot(conc_vec,
                 obd_mat[:, i],
                 '-',
                 color=colormap[name],
                 figure=fig)
    plt.title("OBD vs. [%s]" % (compound_names), figure=fig)
    plt.xscale('log')
    plt.ylim(ymin=0)
    plt.xlabel('[%s] (in M)' % compound_names, figure=fig)
    plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig)
    plt.legend(pathway_names)
    html_writer.write('<h2>Summary figure</h1>\n')
    html_writer.embed_matplotlib_figure(fig)
    html_writer.close()
Beispiel #41
0
class KeggGenes(object):
    
    def __init__(self, html_fname):
        self.serv = None
        self.db = SqliteDatabase('channeling/channeling.sqlite', 'w')
        self.html_writer = HtmlWriter(html_fname)
        
        self.COMPOUND_TABLE_NAME = 'kegg_compounds'
        self.GENE_TABLE_NAME = 'kegg_genes'
        self.GENE_REACTION_TABLE_NAME = 'kegg_genes_to_reactions'
        self.REACTION_TABLE_NAME = 'kegg_reactions'
        self.EQUATION_TABLE_NAME = 'kegg_equations'
        self.STOICHIOMETRY_TABLE_NAME = 'kegg_stoichiometry'
        self.GIBBS_ENERGY_TABLE_NAME = 'kegg_gibbs_energies'
        self.GENE_ENERGY_TABLE_NAME = 'kegg_gene_energies'
        self.FUNCTIONAL_INTERATCTIONS_TABLE = 'parkinson_functional_interactions'
        self.GENE_PAIRS_TABLE_NAME = 'kegg_gene_pairs'
        self.COFACTOR_TABLE_NAME = 'kegg_cofactors'

    def GetAllCompounds(self):
        self.db.CreateTable(self.COMPOUND_TABLE_NAME, "compound INT, name TEXT, all_names TEXT", drop_if_exists=True)
        self.db.CreateIndex('compound_idx', self.COMPOUND_TABLE_NAME, 'compound', unique=True, drop_if_exists=True)

        f = urllib.urlopen('http://rest.kegg.jp/list/cpd/')
        for row in f.read().split('\n'):
            if row.strip() == '':
                continue
            if row.find('\t') != -1:
                compound, all_names = row.split('\t', 1)
            else:
                raise ValueError('Bad compound name: ' + row)
            name = all_names.split(';')[0]
            self.db.Insert(self.COMPOUND_TABLE_NAME, [compound, name, all_names])
        self.db.Commit()
    
    def GetAllGenes(self, organism='eco'):
        self.db.CreateTable(self.GENE_TABLE_NAME, ['organism', 'gene', 'desc'], drop_if_exists=False)
        self.db.CreateIndex('gene_idx', self.GENE_TABLE_NAME, 'gene', unique=False, drop_if_exists=False)

        self.db.Execute("DELETE FROM %s WHERE organism = '%s'" % 
                        (self.GENE_TABLE_NAME, organism))

        f = urllib.urlopen('http://rest.kegg.jp/list/%s/' % organism)
        for row in f.read().split('\n'):
            if row.strip() == '':
                continue
            gene, desc = row.split('\t')
            self.db.Insert(self.GENE_TABLE_NAME, [organism, gene, desc])
        self.db.Commit()
    
    def GetAllReactions(self, organism='eco'):
        self.db.CreateTable(self.GENE_REACTION_TABLE_NAME, ['organism', 'gene', 'reaction'], drop_if_exists=False)
        self.db.CreateIndex('reaction_gene_idx', self.GENE_REACTION_TABLE_NAME, 'gene', unique=False, drop_if_exists=False)
        self.db.CreateIndex('reaction_idx', self.GENE_REACTION_TABLE_NAME, 'reaction', unique=False, drop_if_exists=False)

        self.db.Execute("DELETE FROM %s WHERE organism = '%s'" % 
                        (self.GENE_REACTION_TABLE_NAME, organism))

        f = urllib.urlopen('http://rest.kegg.jp/link/rn/%s' % organism)
        for row in f.read().split('\n'):
            if row.strip() == '':
                continue
            gene, reaction = row.split('\t')
            self.db.Insert(self.GENE_REACTION_TABLE_NAME, [organism, gene, reaction])
        self.db.Commit()        
                
    def GetAllEquations(self):
        self.db.CreateTable(self.EQUATION_TABLE_NAME, ['reaction', 'equation'], drop_if_exists=True)
        self.db.CreateIndex('equation_reaction_idx', self.EQUATION_TABLE_NAME, 'reaction', unique=False, drop_if_exists=True)
        self.db.CreateIndex('equation_idx', self.EQUATION_TABLE_NAME, 'equation', unique=False, drop_if_exists=True)

        all_reactions = []
        for row in self.db.Execute("SELECT distinct(reaction) FROM %s" % 
                                   (self.GENE_REACTION_TABLE_NAME)):
            all_reactions.append(str(row[0]))
        
        for reaction in all_reactions:
            f = urllib.urlopen('http://rest.kegg.jp/get/%s' % reaction)
            for equation in self._ReadReactionEntries(f.read()):
                self.db.Insert(self.EQUATION_TABLE_NAME,
                    [reaction, equation])
                sys.stderr.write('Equation for reaction %s: %s\n' % (reaction, equation))
        self.db.Commit()

    def _ReadReactionEntries(self, s):
        equation_list = []
        entry2fields_map = kegg_parser.ParsedKeggFile.FromKeggAPI(s)
        for key in sorted(entry2fields_map.keys()):
            field_map = entry2fields_map[key]
            if "EQUATION" in field_map:
                equation_list.append(field_map["EQUATION"])
        
        return equation_list
    
    def GetStoichiometries(self):
        self.db.CreateTable(self.STOICHIOMETRY_TABLE_NAME, "equation TEXT, compound TEXT, coefficient REAL", drop_if_exists=True)
        self.db.CreateIndex('stoichiometry_equation_idx', self.STOICHIOMETRY_TABLE_NAME, 'equation', unique=False, drop_if_exists=True)
        self.db.CreateIndex('stoichiometry_compound_idx', self.STOICHIOMETRY_TABLE_NAME, 'compound', unique=False, drop_if_exists=True)

        all_kegg_reactions = []
        all_equations = []
        for row in self.db.Execute("SELECT distinct(equation) FROM %s" % 
                                   (self.EQUATION_TABLE_NAME)):
            try:
                r = Reaction.FromFormula(str(row[0]))
                all_equations.append(str(row[0]))
                all_kegg_reactions.append(r)
            except (KeggParseException, KeggNonCompoundException):
                pass
        
        for i, equation in enumerate(all_equations):
            for compound, coefficient in all_kegg_reactions[i].iteritems():
                self.db.Insert(self.STOICHIOMETRY_TABLE_NAME,
                               [equation, "cpd:C%05d" % compound, coefficient])
    
        self.db.Commit()

    def GetForamtionEnergies(self, thermo):
        self.db.CreateTable(self.GIBBS_ENERGY_TABLE_NAME, "equation TEXT, dG0 REAL, dGc REAL", drop_if_exists=True)
        self.db.CreateIndex('gibbs_equation_idx', self.GIBBS_ENERGY_TABLE_NAME, 'equation', unique=True, drop_if_exists=True)

        all_equations = set()
        for row in self.db.Execute("SELECT distinct(equation) FROM %s" % 
                                   (self.EQUATION_TABLE_NAME)):
            all_equations.add(str(row[0]))
        
        from pygibbs.kegg import Kegg
        kegg = Kegg.getInstance()
        all_kegg_cids = set(kegg.get_all_cids())
        for equation in all_equations:
            try:
                rxn = Reaction.FromFormula(equation)
                if not rxn.get_cids().issubset(all_kegg_cids):
                    raise KeggNonCompoundException
                rxn.Balance(balance_water=True, exception_if_unknown=True)
                dG0 = thermo.GetTransfromedKeggReactionEnergies([rxn], conc=1)[0, 0]
                dGc = thermo.GetTransfromedKeggReactionEnergies([rxn], conc=1e-3)[0, 0]
                self.db.Insert(self.GIBBS_ENERGY_TABLE_NAME, [equation, dG0, dGc])
                
            except (KeggParseException, KeggNonCompoundException, KeggReactionNotBalancedException):
                self.db.Insert(self.GIBBS_ENERGY_TABLE_NAME, [equation, None, None])
    
        self.db.Commit()
    
    def LoadCofactors(self):
        self.db.CreateTable(self.COFACTOR_TABLE_NAME,
                            'compound TEXT, name TEXT, c_min REAL, c_max REAL, ref TEXT',
                            drop_if_exists=True)
        self.db.CreateIndex('cofactor_idx', self.COFACTOR_TABLE_NAME,
                            'compound', unique=True, drop_if_exists=True)

        csv_reader = csv.DictReader(open('channeling/cofactors.csv', 'r'))
        for rowdict in csv_reader:
            self.db.Insert(self.COFACTOR_TABLE_NAME,
                           ["cpd:C%05d" % int(rowdict['cid']), rowdict['name'],
                            float(rowdict['c_min'] or np.nan), float(rowdict['c_max'] or np.nan),
                            rowdict['ref']])
        self.db.Commit()
        
    def CreateGeneEnergyTable(self):
        self.db.CreateTable(self.GENE_ENERGY_TABLE_NAME,
                            "gene TEXT, reaction TEXT, dGc REAL, compound INT, coefficient REAL",
                            drop_if_exists=True)
        self.db.CreateIndex('gene_energy_compound_idx',
                            self.GENE_ENERGY_TABLE_NAME, 'compound', unique=False)
        self.db.CreateIndex('gene_energy_gene_idx',
                            self.GENE_ENERGY_TABLE_NAME, 'gene', unique=False)

        query = """
            INSERT INTO %s (gene, reaction, dGc, compound, coefficient)
                SELECT  gen.gene, rxn.reaction, eng.dGc, sto.compound, sto.coefficient
                FROM    kegg_genes gen, kegg_genes_to_reactions rxn,
                        kegg_equations eqn, kegg_gibbs_energies eng,
                        kegg_stoichiometry sto
                WHERE   gen.organism = 'eco'
                AND     gen.gene = rxn.gene
                AND     rxn.reaction = eqn.reaction
                AND     eqn.equation = eng.equation
                AND     eng.dG0 IS NOT NULL
                AND     eqn.equation = sto.equation
        """ % self.GENE_ENERGY_TABLE_NAME
        self.db.Execute(query)

        query = """
            INSERT INTO %s (gene, reaction, dGc, compound, coefficient)
                SELECT  gen.gene, rxn.reaction, -eng.dGc, sto.compound, -sto.coefficient
                FROM    kegg_genes gen, kegg_genes_to_reactions rxn,
                        kegg_equations eqn, kegg_gibbs_energies eng,
                        kegg_stoichiometry sto
                WHERE   gen.organism = 'eco'
                AND     gen.gene = rxn.gene
                AND     rxn.reaction = eqn.reaction
                AND     eqn.equation = eng.equation
                AND     eng.dG0 IS NOT NULL
                AND     eqn.equation = sto.equation
        """ % self.GENE_ENERGY_TABLE_NAME
        self.db.Execute(query)
        self.db.Commit()
        
    def CreateGenePairsTable(self):
        self.db.CreateTable(self.GENE_PAIRS_TABLE_NAME,
                            "gene1 TEXT, gene2 TEXT, reaction1 TEXT, reaction2 TEXT, "
                            "compound TEXT, coeff1 REAL, coeff2 REAL, dGc1 REAL, "
                            "dGc2 REAL, score REAL",
                            drop_if_exists=True)
        self.db.CreateIndex('gene_pairs_gene_idx',
                            self.GENE_PAIRS_TABLE_NAME,
                            'gene1, gene2', unique=False)
        query = """
            INSERT INTO %s (gene1, gene2, reaction1, reaction2, compound, coeff1, coeff2, dGc1, dGc2, score)
                SELECT p.*, pfi.score FROM
                (
                    SELECT  kge1.gene gene1,
                            kge2.gene gene2, 
                            kge1.reaction reaction1,
                            kge2.reaction reaction2,
                            kge1.compound compound, 
                            kge1.coefficient coeff1,
                            kge2.coefficient coeff2,
                            cast(kge1.dGc as real) dGc1, 
                            cast(kge2.dGc as real) dGc2
                    FROM    kegg_gene_energies kge1, kegg_gene_energies kge2
                    WHERE   kge1.compound = kge2.compound
                    AND     kge1.compound NOT IN (SELECT compound FROM %s)
                    AND     kge1.gene != kge2.gene
                    AND     kge1.reaction != kge2.reaction
                    AND     kge1.coefficient > 0
                    AND     kge2.coefficient < 0
                ) p
                LEFT OUTER JOIN %s pfi
                ON      (pfi.gene1 = p.gene1 AND pfi.gene2 = p.gene2
                         OR
                         pfi.gene1 = p.gene2 AND pfi.gene2 = p.gene1)
        """ % (self.GENE_PAIRS_TABLE_NAME, self.COFACTOR_TABLE_NAME, self.FUNCTIONAL_INTERATCTIONS_TABLE)
        self.db.Execute(query)
        self.db.Commit()
        
    def Correlate(self, dGc1_lower, dGc2_upper, reverse=False):
        if reverse:
            cond = "kgp.dGc1 < %d AND kgp.dGc2 > %d" % (dGc1_lower, dGc2_upper)
        else:
            cond = "kgp.dGc1 > %d AND kgp.dGc2 < %d" % (dGc1_lower, dGc2_upper)
        
        query = """
            SELECT  kgp.gene1, kgp.gene2, sum(%s) nqual, count(*) ntot, max(score)
            FROM %s kgp
            GROUP BY kgp.gene1, kgp.gene2
        """ % (cond, self.GENE_PAIRS_TABLE_NAME)
        
        counters = np.zeros((2, 2))
        
        for row in self.db.Execute(query):
            _gene1, _gene2, nqual, _ntot, score = row
            i = int(score is not None) # is there an PP-interaction
            j = int(nqual > 0) # is this a qualifying pair (thermodynamically)
            counters[i, j] += 1.0

        _inter0 = np.sum(counters[0, :])
        inter1 = np.sum(counters[1, :])
        qual0 = np.sum(counters[:, 0])
        qual1 = np.sum(counters[:, 1])
        total = np.sum(counters.flat)
        
        print "-" * 50
        if reverse:
            print "Checking criterion: first < %d and second > %d" % (dGc1_lower, dGc2_upper)
        else:
            print "Checking criterion: first > %d and second < %d" % (dGc1_lower, dGc2_upper)
        print "Total no. of pairs = %d" % total
        
        print "interaction rate among all pairs (%d out of %d) = %.2f%%" % (inter1, total, 100*(inter1 / total))
        print "qualification rate among all pairs (%d out of %d) = %.2f%%" % (qual1, total, 100*(qual1 / total))
        print "interactions between unqualifying pairs (%d out of %d) = %.2f%%" % (counters[1,0], qual0, 100*(counters[1,0] / qual0))
        print "interactions between qualifying pairs (%d out of %d) = %.2f%%" % (counters[1,1], qual1, 100*(counters[1,1] / qual1))
        
        return counters[1,0] / qual0, counters[1,1] / qual1


    def LoadFunctionalInteractions(self,
            fname='../data/proteomics/coli/functional_interactions.txt'):

        self.db.CreateTable(self.FUNCTIONAL_INTERATCTIONS_TABLE,
                            ['gene1', 'gene2', 'score'],
                            drop_if_exists=True)
        self.db.CreateIndex('interaction_gene_idx',
                            self.FUNCTIONAL_INTERATCTIONS_TABLE,
                            'gene1, gene2', unique=False)
        
        tsv = csv.reader(open(fname, 'r'), delimiter='\t')
        for row in tsv:
            if row[0][0] == '#':
                continue
            gene1 = 'eco:' + row[0].lower()
            gene2 = 'eco:' + row[1].lower()
            score = float(row[2])
            self.db.Insert(self.FUNCTIONAL_INTERATCTIONS_TABLE,
                           [gene1, gene2, score])
        
        self.db.Commit()

    def PlotScatter(self):
        query = """
                SELECT  p.g1, p.g2, pfi.score
                FROM (
                      SELECT  kgp.gene1 gene1, kgp.gene2 gene2, cast(kgp.dGc1 as real) g1, cast(kgp.dGc2 as real) g2
                      FROM    %s kgp
                     ) p
                LEFT OUTER JOIN %s pfi
                ON      (pfi.gene1 = p.gene1 AND pfi.gene2 = p.gene2
                         OR
                         pfi.gene1 = p.gene2 AND pfi.gene2 = p.gene1)
            """ % (self.GENE_PAIRS_TABLE_NAME, self.FUNCTIONAL_INTERATCTIONS_TABLE)

        data = []
        for row in self.db.Execute(query):
            g1, g2, score = row
            data.append([float(g1), float(g2), float(score or 0)])
        data = np.matrix(data)

        ind1 = list(np.where(data[:, 2] > 0)[0].flat)
        ind2 = list(np.where(data[:, 2] == 0)[0].flat)
        fig = plt.figure(figsize=(6,6), dpi=90)    
        plt.plot(data[ind2, 0], data[ind2, 1], 'r.', markersize=5, figure=fig)
        plt.plot(data[ind1, 0], data[ind1, 1], 'g.', markersize=5, figure=fig)
        plt.show()
        
    def PlotCDF(self):
        special_pairs = {('eco:b3236', 'eco:b0720'):"mdh:gltA", # malate dehydrogenase -> oxaloacetate -> citrate synthase
                         ('eco:b1263', 'eco:b1264'):"trpD:trpE"} # trpD -> chorismate -> trpE (two components of anthraline synthase)
        
        query = """
                SELECT gene1, gene2, min(dGc2 - dGc1), max(score)
                FROM %s
                WHERE dGc1 + dGc2 < 0
                AND dGc1 > 10
                GROUP BY gene1, gene2
                """ % (self.GENE_PAIRS_TABLE_NAME)

        data = []
        markers = []
        for row in self.db.Execute(query):
            gene1, gene2, ddG, score = row
            if (gene1, gene2) in special_pairs:
                markers.append((special_pairs[(gene1, gene2)], ddG))
            data.append([ddG, float(score or 0)])
        data = np.matrix(data)

        ind1 = list(np.where(data[:, 1] > 0)[0].flat)
        ind2 = list(np.where(data[:, 1] == 0)[0].flat)
    
        fig = plt.figure(figsize=(6,6), dpi=90)    
        cdf((data[ind2, 0]).flat, label="non-interacting (N = %d)" % len(ind2), style='r', figure=fig)
        cdf((data[ind1, 0]).flat, label="interacting (N = %d)" % len(ind1), style='g', figure=fig)
        for label, ddG in markers:
            plt.plot([ddG, ddG], [0, 1], 'b--', figure=fig)
            plt.text(ddG, 0.1, label)
        plt.xlim(-500, 500)
        plt.xlabel(r"$\Delta G'^c$ (2nd) - $\Delta G'^c$ (1st) [kJ/mol]")
        plt.ylabel(r"Cumulative Distribution Function")
        plt.legend(loc="upper left")

        self.html_writer.embed_matplotlib_figure(fig, width=400, height=400, name='channeling_cdf')

    def PrintEnergies(self):
        query = """
                SELECT e.reaction, e.equation, g.dG0, g.dGc 
                FROM   kegg_equations e, kegg_gibbs_energies g 
                WHERE  e.equation = g.equation
                """

        self.html_writer.write('<font size="1">\n')
        column_names = ['KEGG Reaction', 'Formula', 'dG0', 'dGc']
        self.db.Query2HTML(self.html_writer, query, column_names)
        self.db.Query2CSV('../res/channeling_energy_tabel.csv', query, column_names)
        self.html_writer.write('</font>\n')

    def PrintPairs(self):
        query = """
                SELECT g.gene1, g.gene2, c.name, g.reaction1, g.reaction2, 
                       cast(g.dG1 as int), cast(g.dG2 as int), cast(g.ddG as int),
                       kg1.desc, kg2.desc, g.score FROM
                (SELECT gene1, gene2, reaction1, reaction2, compound, max(dGc1) dG1, min(dGc2) dG2, min(dGc2 - dGc1) ddG, max(score) score
                FROM kegg_gene_pairs
                WHERE dGc1 + dGc2 < 1000000
                AND   dGc1 > -1000000
                GROUP BY gene1, gene2, compound
                ORDER BY ddG) g, kegg_genes kg1, kegg_genes kg2, kegg_compounds c
                WHERE g.gene1 = kg1.gene AND g.gene2 = kg2.gene AND c.compound = g.compound
                """
        
        self.html_writer.write('<font size="1">\n')
        column_names = ['Gene 1', 'Gene 2', 'Common Compound',
                        'Reaction 1', 'Reaction 2',
                        'dGc1', 'dGc2', 'dG2-dG1', 'Desc 1', 'Desc 2',
                        'Score']
        self.db.Query2HTML(self.html_writer, query, column_names)
        self.db.Query2CSV('../res/channeling_pairs_table.csv', query, column_names)
        self.html_writer.write('</font>\n')

    def PrintAllPairs(self):
        query = """
                SELECT g.gene1, g.gene2, c.name, g.reaction1, g.reaction2, 
                       g.dG1, g.dG2 FROM
                (SELECT gene1, gene2, reaction1, reaction2, compound, max(dGc1) dG1, min(dGc2) dG2
                FROM kegg_gene_pairs
                GROUP BY gene1, gene2, compound
                ORDER BY gene1, gene2, reaction1, reaction2, compound) g, kegg_genes kg1, kegg_genes kg2, kegg_compounds c
                WHERE g.gene1 = kg1.gene AND g.gene2 = kg2.gene AND c.compound = g.compound
                """
        
        self.html_writer.write('<font size="1">\n')
        column_names = ['Gene 1', 'Gene 2', 'Common Compound',
                        'Reaction 1', 'Reaction 2',
                        'dGc1', 'dGc2']
        self.db.Query2HTML(self.html_writer, query, column_names)
        self.db.Query2CSV('../res/channeling_all_pairs_table.csv', query, column_names)
        self.html_writer.write('</font>\n')
Beispiel #42
0
if __name__ == "__main__":
    kegg = Kegg.getInstance()
    
    
    graph = {}
    for rid in kegg.get_all_rids():
        r = kegg.rid2reaction(rid)
        for cid1 in r.sparse.keys():
            for cid2 in r.sparse.keys():
                if r.sparse[cid1] * r.sparse[cid2] < 0:
                    graph.setdefault(cid1, set()).add(cid2)
    
    queue = [355]
    cofactors = set([1,2,3,4,5,6,7,8,9,10,11,13,14,20,28,30])
    html_writer = HtmlWriter('../res/kegg_bfs.html')
    
    for i in xrange(3):
        next_queue = set()
        cofactors.update(queue)
        while queue:
            cid = queue.pop(0)
            next_queue.update(graph[cid])
        queue = list(next_queue.difference(cofactors))
        
        for cid in queue:
            try:
                html_writer.write(kegg.cid2mol(cid).ToSVG())
                html_writer.write(kegg.cid2name(cid))
            except (KeggParseException, OpenBabelError):
                html_writer.write(kegg.cid2name(cid))
Beispiel #43
0
    def WriteUniqueReactionReport(self, unique_sparse_reactions,
                                  unique_nist_row_representatives,
                                  unique_data_mat, full_data_mat,
                                  cid2nH_nMg=None):
        
        total_std = full_data_mat[2:4, :].std(1)
        
        fig = plt.figure()
        plt.plot(unique_data_mat[2, :].T, unique_data_mat[3, :].T, '.')
        plt.xlabel("$\sigma(\Delta_r G^\circ)$")
        plt.ylabel("$\sigma(\Delta_r G^{\'\circ})$")
        plt.title('$\sigma_{total}(\Delta_r G^\circ) = %.1f$ kJ/mol, '
                    '$\sigma_{total}(\Delta_r G^{\'\circ}) = %.1f$ kJ/mol' % 
                    (total_std[0, 0], total_std[1, 0]))
        self.html_writer.embed_matplotlib_figure(fig, width=640, height=480)
        logging.info('std(dG0_r) = %.1f' % total_std[0, 0])
        logging.info('std(dG\'0_r) = %.1f' % total_std[1, 0])
        
        rowdicts = []
        for i, reaction in enumerate(unique_sparse_reactions):
            logging.debug('Analyzing unique reaction: ' + 
                          str(unique_sparse_reactions[i]))
            ddG0 = self.GetDissociation().ReverseTransformReaction(reaction,
                pH=7, I=0.1, pMg=10, T=298.15, cid2nH_nMg=cid2nH_nMg)
            
            d = {}
            d["_reaction"] = reaction.to_hypertext(show_cids=False)
            d["reaction"] = reaction.FullReactionString(show_cids=False) # no hypertext for the CSV output
            d["Reference ID"] = unique_nist_row_representatives[i].ref_id
            d["EC"] = unique_nist_row_representatives[i].ec
            d["E(" + symbol_dr_G0 + ")"] = unique_data_mat[0, i]
            d["E(" + symbol_dr_G0_prime + ")"] = unique_data_mat[1, i]
            d["E(" + symbol_dr_G0 + ")'"] = unique_data_mat[0, i] + ddG0
            d["std(" + symbol_dr_G0 + ")"] = unique_data_mat[2, i]
            d["std(" + symbol_dr_G0_prime + ")"] = unique_data_mat[3, i]
            d["diff"] = unique_data_mat[2, i] - unique_data_mat[3, i]
            d["#observations"] = "%d" % unique_data_mat[4, i]
            
            flag = 0
            c_nad = reaction.sparse.get(3, 0)
            c_nadh = reaction.sparse.get(4, 0)
            c_nadp = reaction.sparse.get(6, 0)
            c_nadph = reaction.sparse.get(5, 0)
            if  c_nad == 1 and c_nadh == -1:
                flag = 1
            elif c_nad == -1 and c_nadh == 1:
                flag = -1
            elif c_nadp == 1 and c_nadph == -1:
                flag = 2
            elif c_nadp == -1 and c_nadph == 1:
                flag = -2
            d["Arren Flag"] = flag

            if d["diff"] > self.std_diff_threshold:
                _mkdir('../res/prc_reactions')
                link = "prc_reactions/%s.html" % reaction.name
                d["analysis"] = '<a href="%s">link</a>' % link
                reaction_html_writer = HtmlWriter(os.path.join('../res', link))
                self.AnalyzeSingleReaction(reaction,
                                           html_writer=reaction_html_writer)
            rowdicts.append(d)
        
        result_headers = ["E(" + symbol_dr_G0 + ")",
                          "E(" + symbol_dr_G0_prime + ")", 
                          "E(" + symbol_dr_G0 + ")'",
                          "std(" + symbol_dr_G0 + ")",
                          "std(" + symbol_dr_G0_prime + ")"]
        rowdicts.sort(key=lambda x:x["diff"], reverse=True)
        self.html_writer.write_table(rowdicts, ["reaction", "Reference ID"] + 
                                     result_headers + ["EC", "#observations", "analysis"],
                                     decimal=1)
        csv_writer = csv.DictWriter(open('../res/nist_regression_unique.csv', 'w'),
                                    ["_reaction", "Reference ID", "EC", "#observations"]
                                    + result_headers + ['Arren Flag'],
                                    extrasaction='ignore')
        csv_writer.writeheader()
        csv_writer.writerows(rowdicts)
Beispiel #44
0
    #print m.ToFormat('mol')
    #print m.ToFormat('mol2')
    #print m.ToFormat('smi')
    #print m.ToFormat('inchi')
    #print m.ToFormat('sdf')

    diss_table = Molecule._GetDissociationTable('C(=O)(O)CN',
                                                fmt='smiles',
                                                mid_pH=default_pH,
                                                min_pKa=0,
                                                max_pKa=14,
                                                T=default_T)
    print "glycine\n", diss_table

    html_writer = HtmlWriter('../res/molecule.html')
    from pygibbs.kegg import Kegg
    kegg = Kegg.getInstance()
    html_writer.write('<h1>pKa estimation using ChemAxon</h1>\n')
    for cid in [41]:
        m = kegg.cid2mol(cid)
        html_writer.write("<h2>C%05d : %s</h2>\n" % (cid, str(m)))
        diss_table = m.GetDissociationTable()
        pmap = diss_table.GetPseudoisomerMap()
        diss_table.WriteToHTML(html_writer)
        pmap.WriteToHTML(html_writer)
        html_writer.write("</p>\n")
        #print m.GetDissociationConstants()
        #print m.GetMacrospecies()

    #obmol = m.ToOBMol()
def main():
    kegg = Kegg.getInstance()
    prefix = "../res/prc_"

    fixed_cids = {}  # a dictionary from CID to pairs of (nH, dG0)

    # Alberty formation energies directly measured, linearly independent:
    fixed_cids[1] = (2, -237.19)  # H2O
    fixed_cids[9] = (1, -1096.1)  # HPO3(-2)
    fixed_cids[14] = (4, -79.31)  # NH4(+1)
    fixed_cids[59] = (0, -744.53)  # SO4(-2)
    fixed_cids[288] = (1, -586.77)  # HCO3(-1)

    # Alberty zeros:
    fixed_cids[3] = (26, 0.0)  # NAD(ox)
    fixed_cids[10] = (32, 0.0)  # CoA
    fixed_cids[127] = (30, 0.0)  # glutathione(ox)
    fixed_cids[376] = (28, 0.0)  # retinal(ox)

    # Directly measured values
    fixed_cids[4] = (27, 22.65)  # NAD(red) -- relative to NAD(ox)
    fixed_cids[212] = (13, -194.5)  # adenosine
    # fixed_cids[294] = (12, -409.2) # inosine - linearly dependent on other 'anchors'

    # Alberty zeros which are not in NIST:
    # fixed_cids[524] = ( 0, 0.0) # cytochrome c(ox)
    # fixed_cids[16]  = (31, 0.0) # FAD(ox)
    # fixed_cids[139] = ( 0, 0.0) # ferredoxin(ox)
    # fixed_cids[61]  = (19, 0.0) # FMN(ox)
    # fixed_cids[343] = ( 0, 0.0) # thioredoxin(ox)
    # fixed_cids[399] = (90, 0.0) # ubiquinone(ox)

    public_db = SqliteDatabase("../data/public_data.sqlite")
    alberty = PsuedoisomerTableThermodynamics.FromDatabase(
        public_db, "alberty_pseudoisomers", label=None, name="Alberty"
    )
    alberty_cid2dG0 = {}
    alberty_cid2nH = {}
    for cid in alberty.get_all_cids():
        pmap = alberty.cid2PseudoisomerMap(cid)
        dG0, _dG0_tag, nH, _z, _nMg = pmap.GetMostAbundantPseudoisomer(
            pH=default_pH, I=default_I, pMg=default_pMg, T=default_T
        )
        alberty_cid2nH[cid] = nH
        alberty_cid2dG0[cid] = dG0

    if not os.path.exists(prefix + "S.txt"):
        db = SqliteDatabase("../res/gibbs.sqlite")
        nist_regression = NistRegression(db)

        cid2nH = {}
        for cid in nist_regression.nist.GetAllCids():
            if cid in fixed_cids:
                cid2nH[cid] = fixed_cids[cid][0]
            elif cid in alberty_cid2nH:
                cid2nH[cid] = alberty_cid2nH[cid]
            else:
                tmp = nist_regression.dissociation.GetMostAbundantPseudoisomer(
                    cid, pH=default_pH, I=default_I, pMg=default_pMg, T=default_T
                )
                if tmp is not None:
                    cid2nH[cid] = tmp[0]
                else:
                    logging.warning(
                        "The most abundant pseudoisomer of %s (C%05d) "
                        "cannot be resolved. Using nH = 0." % (kegg.cid2name(cid), cid)
                    )
                    cid2nH[cid] = 0

        # nist_regression.std_diff_threshold = 2.0 # the threshold over which to print an analysis of a reaction
        # nist_regression.nist.T_range = None#(273.15 + 24, 273.15 + 40)
        S, dG0, cids = nist_regression.ReverseTransform(cid2nH=cid2nH)

        # export the raw data matrices to text files

        C = np.array([[cid, cid2nH.get(cid, 0)] for cid in cids])
        np.savetxt(prefix + "CID.txt", C, fmt="%d", delimiter=",")
        np.savetxt(prefix + "S.txt", S, fmt="%g", delimiter=",")
        np.savetxt(prefix + "dG0.txt", dG0, fmt="%.2f", delimiter=",")
    else:
        C = np.loadtxt(prefix + "CID.txt", delimiter=",")
        cids = [int(cid) for cid in C[:, 0]]
        cid2nH = {}
        for i, cid in enumerate(cids):
            cid2nH[cid] = int(C[i, 1])
        S = np.loadtxt(prefix + "S.txt", delimiter=",")
        dG0 = np.loadtxt(prefix + "dG0.txt", delimiter=",")
        dG0 = np.reshape(dG0, (dG0.shape[0], 1))

    html_writer = HtmlWriter("../res/regression_fast.html")
    html_writer.write("<h1>Pseudoisomeric Reactant Contributions</h1>\n")
    html_writer.write("<p>The stoichiometric matrix (S):")
    html_writer.insert_toggle(start_here=True)
    stoichiometric_matrix2html(html_writer, S, cids)
    html_writer.div_end()
    html_writer.write("</p>")

    index2value = {}
    S_extended = S  # the stoichiometric matrix, extended with elementary basis vector for the fixed compounds
    for cid in fixed_cids.keys():
        i = cids.index(cid)
        e_i = np.zeros((1, len(cids)))
        e_i[0, i] = 1.0
        S_extended = np.vstack([S_extended, e_i])
        nH, dG0_fixed = fixed_cids[cid]
        index2value[i] = dG0_fixed

    x, _K = LinearRegression.LeastSquaresWithFixedPoints(S, dG0, index2value)
    cid2dG0 = {}
    for i, cid in enumerate(cids):
        cid2dG0[cid] = x[i]

    # Calculate the Kernel of the reduced stoichiometric matrix (after removing
    # the columns of the fixed compounds).
    cids_red = [cid for cid in cids if cid not in fixed_cids]
    index_red = [i for i in xrange(len(cids)) if i not in index2value]
    S_red = S[:, index_red]
    K_red = LinearRegression.Kernel(S_red)

    # print "Reduced Stoichiometric Matrix:"
    # print matrix2string(S_red, cids_red, kegg)
    # print '-'*80

    # Find all CIDs that are completely determined and do not depend on any
    # free variable. In other words, all zeros columns in K2.
    dict_list = []

    determined_indices = np.where(np.sum(abs(K_red), 0) < 1e-10)[0]  # all zero-columns in reducedK
    determined_cids = [cids_red[i] for i in determined_indices]
    plot_data = []
    for i, cid in enumerate(cids):
        d = {
            "CID": "C%05d" % cid,
            "Compound": kegg.cid2name(cid),
            "nH": "%d" % cid2nH[cid],
            "dG0 (PRC)": "%.1f" % cid2dG0[cid],
        }
        if cid in alberty_cid2dG0:
            d["dG0 (Alberty)"] = "%.1f" % alberty_cid2dG0[cid]
            if cid not in fixed_cids:
                plot_data.append((alberty_cid2dG0[cid], cid2dG0[cid], kegg.cid2name(cid)))
        else:
            d["dG0 (Alberty)"] = ""

        if cid in fixed_cids:
            d["Depends on"] = "anchored"
        elif cid in determined_cids:
            d["Depends on"] = "fixed compounds"
        else:
            d["Depends on"] = "kernel dimensions"

        dict_list.append(d)

    dict_list.sort(key=lambda (x): (x["Depends on"], x["CID"]))
    html_writer.write("<p>Formation energies determined by the linear constraints:")
    html_writer.insert_toggle(start_here=True)
    html_writer.write('<font size="1">')
    html_writer.write_table(
        dict_list, headers=["#", "Compound", "CID", "nH", "dG0 (PRC)", "dG0 (Alberty)", "Depends on"]
    )
    html_writer.write("</font>")
    html_writer.div_end()
    html_writer.write("</p>")

    # Plot a comparison between PRC and Alberty formation energies
    fig = plt.figure(figsize=(8, 8), dpi=80)
    plt.plot([x[0] for x in plot_data], [x[1] for x in plot_data], "b.", figure=fig)
    for x, y, name in plot_data:
        plt.text(x, y, name, fontsize=6)
    plt.xlabel("Alberty $\Delta_f G^\circ$")
    plt.ylabel("PRC $\Delta_f G^\circ$")
    html_writer.write("<p>Plot comparing PRC and Alberty results:")
    html_writer.insert_toggle(start_here=True)
    html_writer.embed_matplotlib_figure(fig)
    html_writer.div_end()
    html_writer.write("</p>")

    K_sparse = SparseKernel(S_red).Solve()
    html_writer.write("<p>The sparse null-space of the reduced stoichiometric matrix:")
    html_writer.insert_toggle(start_here=True)
    stoichiometric_matrix2html(html_writer, K_sparse, cids_red)
    html_writer.div_end()
    html_writer.write("</p>")

    dict_list = []
    index2string_html = dict((i, "V<sub>%02d</sub>" % i) for i in xrange(K_sparse.shape[0]))
    index2string = dict((i, "V%d" % i) for i in xrange(K_sparse.shape[0]))
    for i, cid in enumerate(cids_red):
        d = {}
        d["KEGG ID"] = '<a href="%s">C%05d</a>' % (kegg.cid2link(cid), cid)
        d["KEGG ID plain"] = "C%05d" % cid
        d["Compound"] = kegg.cid2name(cid)
        d["nH"] = "%d" % cid2nH[cid]

        if cid in alberty_cid2dG0:
            d["dG0 (Alberty)"] = "%.1f" % alberty_cid2dG0[cid]
        else:
            d["dG0 (Alberty)"] = ""

        d["dG0 (PRC)"] = "%.1f" % cid2dG0[cid]
        d["dG0 (PRC) plain"] = "%.1f" % cid2dG0[cid]

        indic = np.where(abs(K_sparse[:, i]) > 1e-10, 1, 0).tolist()
        indic.reverse()
        d["order_key"] = indic
        if mlab.rms_flat(K_sparse[:, i]) > 1e-10:
            d["dG0 (PRC)"] += " + (" + vector2string(K_sparse[:, i], index2string_html) + ")"
            d["dG0 (PRC) plain"] += " + (" + vector2string(K_sparse[:, i], index2string) + ")"
        dict_list.append(d)

    dict_list.sort(key=lambda (d): (d["order_key"], d["KEGG ID plain"]))

    # Export the results to CSV
    csv_writer = csv.writer(open("../res/prc_results.csv", "w"))
    csv_writer.writerow(["KEGG ID", "Compound", "nH", "dG0 (PRC)", "dG0 (Alberty)"])
    for d in dict_list:
        csv_writer.writerow([d["KEGG ID plain"], d["Compound"], d["nH"], d["dG0 (PRC) plain"], d["dG0 (Alberty)"]])

    html_writer.write("<p>All formation energies as a function of the free variables:")
    html_writer.insert_toggle(start_here=True)
    html_writer.write('<font size="1">')
    html_writer.write_table(dict_list, headers=["#", "KEGG ID", "Compound", "nH", "dG0 (PRC)", "dG0 (Alberty)"])
    html_writer.write("</font>")
    html_writer.div_end()
    html_writer.write("</p>")

    fp = open("../res/prc_latex.txt", "w")
    fp.write(
        latex.table2LaTeX(
            dict_list, headers=["#", "KEGG ID plain", "Compound", "nH", "dG0 (PRC) plain", "dG0 (Alberty)"]
        )
    )
    fp.close()
def main():
    db = database.SqliteDatabase('../res/gibbs.sqlite')
    html_writer = HtmlWriter("../res/nist/report.html")
    gc = GroupContribution(db)
    gc.override_gc_with_measurements = True
    gc.init()
    grad = GradientAscent(gc)
    nist = Nist(db, html_writer, gc.kegg())
    nist.FromDatabase()
    alberty = Alberty()
    hatzi = Hatzi()
    
    if True:
        grad.load_nist_data(nist, alberty, skip_missing_reactions=False, T_range=(298, 314))
        grad.verify_results("Alberty", alberty, html_writer)
        
        #grad.write_pseudoisomers("../res/nist/nist_dG0_f.csv")

        #html_writer.write("<h2>Using Group Contribution (Hatzimanikatis' implementation)</h2>")
        #html_writer.write("<h3>Correlation with the reduced NIST database (containing only compounds that appear in Alberty's list)</h3>")
        #logging.info("calculate the correlation between Hatzimanikatis' predictions and the reduced NIST database")
        #grad.verify_results("Hatzimanikatis_Reduced", hatzi, html_writer)

        #grad.load_nist_data(nist, hatzi, skip_missing_reactions=True, T_range=(298, 314))
        grad.verify_results("Hatzimanikatis", hatzi, html_writer)

        #grad.load_nist_data(nist, gc, skip_missing_reactions=True, T_range=(298, 314))
        grad.verify_results("Milo", gc, html_writer)
    elif False:
        # Run the gradient ascent algorithm, where the starting point is the same file used for training the GC algorithm
        grad.load_dG0_data("../data/thermodynamics/dG0.csv")
        # load the data for the anchors (i.e. compounds whose dG0 should not be changed - usually their value will be 0). 
        grad.anchors = grad.load_dG0_data("../data/thermodynamics/nist_anchors.csv")
        grad.load_nist_data(nist, grad, skip_missing_reactions=True)
        print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data))
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient1")
        
    elif False:
        # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006)
        grad.load_nist_data(nist, alberty, skip_missing_reactions=True)
        print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data))
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient2")
    
    elif False:
        # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006)
        # Use DETERMINISTIC gradient ascent
        grad.load_nist_data(nist, alberty, skip_missing_reactions=True, T_range=(24 + 273.15, 40 + 273.15))
        print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data))
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        grad.deterministic_hill_climb(max_i=200)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient_deterministic")
        
    elif False:
        # Run the gradient ascent algorithm, where the starting point arbitrary (predict all of the NIST compounds)
        grad = GradientAscent(gc)
        grad.load_nist_data(nist, skip_missing_reactions=False)
        print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data))
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient3")
    
    elif False: # Use Alberty's table from (Mathematica 2006) to calculate the dG0 of all possible reactions in KEGG
        grad = GradientAscent(gc)
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        (pH, I, T) = (7, 0, 300)
        counter = 0
        for rid in grad.kegg.get_all_rids():
            sparse_reaction = grad.kegg.rid2sparse_reaction(rid)
            try:
                dG0 = grad.reaction_to_dG0(sparse_reaction, pH, I, T)
                print "R%05d: dG0_r = %.2f [kJ/mol]" % (rid, dG0)
                counter += 1
            except MissingCompoundFormationEnergy as e:
                #print "R%05d: missing formation energy of C%05d" % (rid, e.cid)
                pass
        print "Managed to calculate the dG0 of %d reactions" % counter
        
    elif False:
        util._mkdir("../res/nist/fig")
        csv_writer = csv.writer(open("../res/nist/pseudoisomers.csv", "w"))
                
        cid_set = set()
        for row in nist.data:
            sparce_reaction = row['sparse']
            cid_set.update(sparce_reaction.keys())
        
        html_writer.write("<table border=1>\n")
        for cid in sorted(list(cid_set)):
            html_writer.write("  <tr><td>C%05d</td><td>%s</td><td>" % (cid, grad.kegg.cid2name(cid)))
            try:
                mol = grad.kegg.cid2mol(cid)
                img_fname = '../res/nist/fig/C%05d.png' % cid
                html_writer.embed_img(img_fname, "C%05d" % cid)
                mol.draw(show=False, filename=img_fname)
            except AssertionError as e:
                html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e)))
            except KeggParseException as e:
                html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e)))
            html_writer.write("</td><td>")
            if (cid in alberty.cid2pmap_dict):
                for (nH, z) in alberty.cid2pmap_dict[cid].keys():
                    html_writer.write("(nH=%d, z=%d)<br>" % (nH, z))
                    csv_writer.writerow((cid, nH, z))
            else:
                nH = grad.kegg.cid2num_hydrogens(cid)
                z = grad.kegg.cid2charge(cid)
                html_writer.write("unknown pseudoisomers<br>")
                html_writer.write("(nH=%d, z=%d)" % (nH, z))
                csv_writer.writerow((cid, nH, z))
            
            html_writer.write("</td></tr>\n")
        html_writer.write("</table>\n")
    html_writer.close()
Beispiel #47
0
def main():
    html_writer = HtmlWriter("../res/nist/report.html")
    estimators = LoadAllEstimators()
    nist = Nist()
    nist.T_range = (273.15 + 24, 273.15 + 40)
    #nist.override_I = 0.25
    #nist.override_pMg = 14.0
    #nist.override_T = 298.15

    html_writer.write('<p>\n')
    html_writer.write("Total number of reaction in NIST: %d</br>\n" %
                      len(nist.data))
    html_writer.write("Total number of reaction in range %.1fK < T < %.1fK: %d</br>\n" % \
                      (nist.T_range[0], nist.T_range[1], len(nist.SelectRowsFromNist())))
    html_writer.write('</p>\n')

    reactions = {}
    reactions['KEGG'] = []
    for reaction in Kegg.getInstance().AllReactions():
        try:
            reaction.Balance(balance_water=True, exception_if_unknown=True)
            reactions['KEGG'].append(reaction)
        except (KeggReactionNotBalancedException, KeggParseException,
                OpenBabelError):
            pass

    reactions['FEIST'] = Feist.FromFiles().reactions
    reactions['NIST'] = nist.GetUniqueReactionSet()

    pairs = []
    #pairs += [('hatzi_gc', 'UGC')], ('PGC', 'PRC'), ('alberty', 'PRC')]
    for t1, t2 in pairs:
        logging.info('Writing the NIST report for %s vs. %s' %
                     (estimators[t1].name, estimators[t2].name))
        html_writer.write('<p><b>%s vs. %s</b> ' %
                          (estimators[t1].name, estimators[t2].name))
        html_writer.insert_toggle(start_here=True)
        two_way_comparison(html_writer=html_writer,
                           thermo1=estimators[t1],
                           thermo2=estimators[t2],
                           reaction_list=reactions['FEIST'],
                           name='%s_vs_%s' % (t1, t2))
        html_writer.div_end()
        html_writer.write('</p>')

    if False:
        estimators['alberty'].CompareOverKegg(
            html_writer,
            other=estimators['PRC'],
            fig_name='kegg_compare_alberty_vs_nist')

    rowdicts = []
    rowdict = {'Method': 'Total'}
    for db_name, reaction_list in reactions.iteritems():
        rowdict[db_name + ' coverage'] = len(reaction_list)
    rowdicts.append(rowdict)

    for name in ['UGC', 'PGC', 'PRC', 'alberty', 'merged', 'hatzi_gc']:
        thermo = estimators[name]
        logging.info('Writing the NIST report for %s' % thermo.name)
        html_writer.write('<p><b>%s</b> ' % thermo.name)
        html_writer.insert_toggle(start_here=True)
        num_estimations, rmse = nist.verify_results(html_writer=html_writer,
                                                    thermodynamics=thermo,
                                                    name=name)
        html_writer.div_end()
        html_writer.write('N = %d, RMSE = %.1f</p>\n' %
                          (num_estimations, rmse))
        logging.info('N = %d, RMSE = %.1f' % (num_estimations, rmse))

        rowdict = {
            'Method': thermo.name,
            'RMSE (kJ/mol)': "%.1f (N=%d)" % (rmse, num_estimations)
        }
        for db_name, reaction_list in reactions.iteritems():
            n_covered = thermo.CalculateCoverage(reaction_list)
            percent = n_covered * 100.0 / len(reaction_list)
            rowdict[db_name +
                    " coverage"] = "%.1f%% (%d)" % (percent, n_covered)
            logging.info(db_name + " coverage = %.1f%%" % percent)
        rowdicts.append(rowdict)

    headers = ['Method', 'RMSE (kJ/mol)'] + \
        [db_name + ' coverage' for db_name in reactions.keys()]
    html_writer.write_table(rowdicts, headers=headers)
Beispiel #48
0
def AnalyzePHGradient(pathway_file, output_prefix, thermo, conc_range):
    pathway_list = KeggFile2PathwayList(pathway_file)
    pathway_names = [entry for (entry, _) in pathway_list]
    html_writer = HtmlWriter('%s.html' % output_prefix)

    # run once just to make sure that the pathways are all working:
    logging.info("testing all pathways with default pH")
    data = GetAllOBDs(pathway_list,
                      html_writer,
                      thermo,
                      pH=None,
                      section_prefix="test",
                      balance_water=True,
                      override_bounds={})

    csv_output = csv.writer(open('%s.csv' % output_prefix, 'w'))
    csv_output.writerow(['pH'] + pathway_names)

    util._mkdir(output_prefix)
    shadow_csvs = {}
    for d in data:
        path = '%s/%s.csv' % (output_prefix, d['entry'])
        shadow_csvs[d['entry']] = csv.writer(open(path, 'w'))
        shadow_csvs[d['entry']].writerow(['pH'] + d['rids'])

    pH_vec = ParseConcentrationRange(conc_range)
    obd_mat = []
    for pH in pH_vec.flat:
        logging.info("pH = %.1f" % (pH))
        data = GetAllOBDs(pathway_list,
                          html_writer=None,
                          thermo=thermo,
                          pH=pH,
                          section_prefix="",
                          balance_water=True,
                          override_bounds={})
        obds = [d['OBD'] for d in data]
        obd_mat.append(obds)
        csv_output.writerow([data[0]['pH']] + obds)

        for d in data:
            if type(d['reaction prices']) != types.FloatType:
                prices = list(d['reaction prices'].flat)
                shadow_csvs[d['entry']].writerow([pH] + prices)

    obd_mat = np.matrix(
        obd_mat)  # rows are pathways and columns are concentrations

    fig = plt.figure(figsize=(6, 6), dpi=90)
    colormap = color.ColorMap(pathway_names)
    for i, name in enumerate(pathway_names):
        plt.plot(pH_vec, obd_mat[:, i], '-', color=colormap[name], figure=fig)
    plt.title("OBD vs. pH", figure=fig)
    plt.ylim(0, np.max(obd_mat.flat))
    plt.xlabel('pH', figure=fig)
    plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig)
    plt.legend(pathway_names)
    html_writer.write('<h2>Summary figure</h1>\n')
    html_writer.embed_matplotlib_figure(fig)

    html_writer.close()
Beispiel #49
0
def analyze(prefix, thermo):    
    kegg_file = ParsedKeggFile.FromKeggFile('../data/thermodynamics/%s.txt' % prefix)
    html_writer = HtmlWriter('../res/%s.html' % prefix)

    co2_hydration = Reaction.FromFormula("C00011 + C00001 => C00288")
    
    #pH_vec = np.arange(5, 9.001, 0.5)
    #pH_vec = np.array([6, 7, 8])
    pH_vec = np.array([6, 7, 8]) # this needs to be fixed so that the txt file will set the pH
    #co2_conc_vec = np.array([1e-5, 1e-3])
    co2_conc_vec = np.array([1e-5])
    data_mat = []
    override_bounds = {}
    
    for pH in pH_vec.flat:
        co2_hydration_dG0_prime = float(thermo.GetTransfromedKeggReactionEnergies([co2_hydration], pH=pH))
        for co2_conc in co2_conc_vec.flat:
            carbonate_conc = co2_conc * np.exp(-co2_hydration_dG0_prime / (R*default_T))
            #print "[CO2] = %g, [carbonate] = %g, pH = %.1f, I = %.2fM" % (co2_conc, carbonate_conc, pH, I)
            override_bounds[11] = (co2_conc, co2_conc)
            override_bounds[288] = (carbonate_conc, carbonate_conc)
            
            section_prefix = 'pH_%g_CO2_%g' % (pH, co2_conc*1000)
            section_title = 'pH = %g, [CO2] = %g mM' % (pH, co2_conc*1000)
            html_writer.write('<h1 id="%s_title">%s</h1>\n' %
                              (section_prefix, section_title))
            html_writer.write_ul(['<a href="#%s_tables">Individual result tables</a>' % section_prefix,
                                  '<a href="#%s_summary">Summary table</a>' % section_prefix,
                                  '<a href="#%s_figure">Summary figure</a>' % section_prefix])

            data, labels = pareto(kegg_file, html_writer, thermo,
                pH=pH, section_prefix=section_prefix, balance_water=True,
                override_bounds=override_bounds)
            data_mat.append(data)
    
    data_mat = np.array(data_mat)
    if data_mat.shape[0] == 1:
        pareto_fig = plt.figure(figsize=(6, 6), dpi=90)
        plt.plot(data_mat[0, :, 0], data_mat[0, :, 1], '.', figure=pareto_fig)
        for i in xrange(data_mat.shape[1]):
            if data[i, 1] < 0:
                color = 'grey'
            else:
                color = 'black'
            plt.text(data_mat[0, i, 0], data_mat[0, i, 1], labels[i],
                     ha='left', va='bottom',
                     fontsize=8, color=color, figure=pareto_fig)
        plt.title(section_title, figure=pareto_fig)
    else:
        pareto_fig = plt.figure(figsize=(10, 10), dpi=90)
        for i in xrange(data_mat.shape[1]):
            plt.plot(data_mat[:, i, 0], data_mat[:, i, 1], '-', figure=pareto_fig)
            plt.text(data_mat[0, i, 0], data_mat[0, i, 1], '%g' % pH_vec[0],
                     ha='center', fontsize=6, color='black', figure=pareto_fig)
            plt.text(data_mat[-1, i, 0], data_mat[-1, i, 1], '%g' % pH_vec[-1],
                     ha='center', fontsize=6, color='black', figure=pareto_fig)
        plt.legend(labels, loc='upper right')
        plt.title('Pareto', figure=pareto_fig)
    
    plt.xlabel('Optimal Energetic Efficiency [kJ/mol]', figure=pareto_fig)
    plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=pareto_fig)
    html_writer.write('<h2 id="%s_figure">Summary figure</h1>\n' % section_prefix)

    # plot the Pareto figure showing all values (including infeasible)
    html_writer.embed_matplotlib_figure(pareto_fig, name=prefix + '_0')

    # set axes to hide infeasible pathways and focus on feasible ones
    pareto_fig.axes[0].set_xlim(None, 0)
    pareto_fig.axes[0].set_ylim(0, None)
    html_writer.embed_matplotlib_figure(pareto_fig, name=prefix + '_1')
    
    html_writer.close()