Beispiel #1
0
def AnalyzeConcentrationGradient(prefix,
                                 thermo,
                                 csv_output_fname,
                                 cid=13):  # default compound is PPi
    compound_name = thermo.kegg.cid2name(cid)
    kegg_file = ParsedKeggFile.FromKeggFile('../data/thermodynamics/%s.txt' %
                                            prefix)
    html_writer = HtmlWriter('../res/%s.html' % prefix)
    null_html_writer = NullHtmlWriter()
    if csv_output_fname:
        csv_output = csv.writer(open(csv_output_fname, 'w'))
        csv_output.writerow(['pH', 'I', 'T', '[C%05d]' % cid] +
                            kegg_file.entries())
    else:
        csv_output = None

    pH_vec = np.array(
        [7])  # this needs to be fixed so that the txt file will set the pH
    conc_vec = 10**(-np.arange(2, 6.0001, 0.25)
                    )  # logarithmic scale between 10mM and 1nM
    override_bounds = {}

    fig = plt.figure(figsize=(6, 6), dpi=90)
    legend = []
    for pH in pH_vec.flat:
        obd_vec = []
        for conc in conc_vec.flat:
            override_bounds[cid] = (conc, conc)
            logging.info("pH = %g, [%s] = %.1e M" % (pH, compound_name, conc))
            data, labels = pareto(kegg_file,
                                  null_html_writer,
                                  thermo,
                                  pH=pH,
                                  section_prefix="",
                                  balance_water=True,
                                  override_bounds=override_bounds)
            obd_vec.append(data[:, 1])
            csv_output.writerow([pH, thermo.I, thermo.T, conc] +
                                list(data[:, 1].flat))
        obd_mat = np.matrix(
            obd_vec)  # rows are pathways and columns are concentrations
        plt.plot(conc_vec, obd_mat, '.-', figure=fig)
        legend += ['%s, pH = %g' % (l, pH) for l in labels]

    plt.title("ODB vs. [%s] (I = %gM, T = %gK)" %
              (compound_name, thermo.I, thermo.T),
              figure=fig)
    plt.xscale('log')
    plt.xlabel('Concentration of %s [M]' % thermo.kegg.cid2name(cid),
               figure=fig)
    plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig)
    plt.legend(legend)
    html_writer.write('<h2 id="figure_%s">Summary figure</h1>\n' % prefix)
    html_writer.embed_matplotlib_figure(fig, name=prefix)

    html_writer.close()
Beispiel #2
0
def main():
    html_writer = HtmlWriter("../res/formation_resolve.html")
    estimators = LoadAllEstimators()
    for name in ['alberty']:
        thermo = estimators[name]
        nist = Nist()
        nist.verify_formation(html_writer=html_writer, 
                              thermodynamics=thermo,
                              name=name)
    html_writer.close()
Beispiel #3
0
def AnalyzePareto(pathway_file, output_prefix, thermo, pH=None):
    pathway_list = KeggFile2PathwayList(pathway_file)
    pathway_names = [entry for (entry, _) in pathway_list]
    html_writer = HtmlWriter('%s.html' % output_prefix)
    xls_workbook = Workbook()

    logging.info("running OBD analysis for all pathways")
    data = GetAllOBDs(pathway_list,
                      html_writer,
                      thermo,
                      pH=pH,
                      section_prefix="pareto",
                      balance_water=True,
                      override_bounds={})

    for d in data:
        sheet = xls_workbook.add_sheet(d['entry'])
        sheet.write(0, 0, "reaction")
        sheet.write(0, 1, "formula")
        sheet.write(0, 2, "flux")
        sheet.write(0, 3, "delta_r G'")
        sheet.write(0, 4, "shadow price")
        for r, rid in enumerate(d['rids']):
            sheet.write(r + 1, 0, rid)
            sheet.write(r + 1, 1, d['formulas'][r])
            sheet.write(r + 1, 2, d['fluxes'][0, r])
            sheet.write(r + 1, 3, d['dG_r_prime'][0, r])
            sheet.write(r + 1, 4, d['reaction prices'][r, 0])

    xls_workbook.save('%s.xls' % output_prefix)

    obds = []
    minus_avg_tg = []
    for i, d in enumerate(data):
        obds.append(d['OBD'])
        if d['sum of fluxes']:
            minus_avg_tg.append(-d['max total dG'] / d['sum of fluxes'])
        else:
            minus_avg_tg.append(0)

    fig = plt.figure(figsize=(6, 6), dpi=90)
    plt.plot(minus_avg_tg, obds, 'o', figure=fig)
    plt.plot([0, max(minus_avg_tg)], [0, max(minus_avg_tg)], '--g')
    for i, name in enumerate(pathway_names):
        plt.text(minus_avg_tg[i], obds[i], name)
    plt.title('OBD vs. Average $\Delta_r G$')
    plt.ylim(ymin=0)
    plt.xlim(xmin=0)
    plt.xlabel(r'- Average $\Delta_r G$ [kJ/mol]')
    plt.ylabel(r'Optimized Distributed Bottleneck [kJ/mol]')
    html_writer.write('<h2>Pareto figure</h1>\n')
    html_writer.embed_matplotlib_figure(fig)
    html_writer.close()
Beispiel #4
0
def AnalyzePHGradient(pathway_file, output_prefix, thermo, conc_range):
    pathway_list = KeggFile2PathwayList(pathway_file)
    pathway_names = [entry for (entry, _) in pathway_list]
    html_writer = HtmlWriter('%s.html' % output_prefix)
    
    # run once just to make sure that the pathways are all working:
    logging.info("testing all pathways with default pH")
    data = GetAllOBDs(pathway_list, html_writer, thermo,
                  pH=None, section_prefix="test", balance_water=True,
                  override_bounds={})
    
    csv_output = csv.writer(open('%s.csv' % output_prefix, 'w'))
    csv_output.writerow(['pH'] + pathway_names)
    
    util._mkdir(output_prefix)
    shadow_csvs = {}
    for d in data:
        path = '%s/%s.csv' % (output_prefix, d['entry'])
        shadow_csvs[d['entry']] = csv.writer(open(path, 'w'))
        shadow_csvs[d['entry']].writerow(['pH'] + d['rids'])

    pH_vec = ParseConcentrationRange(conc_range)
    obd_mat = []
    for pH in pH_vec.flat:
        logging.info("pH = %.1f" % (pH))
        data = GetAllOBDs(pathway_list, html_writer=None, thermo=thermo,
                      pH=pH, section_prefix="", balance_water=True,
                      override_bounds={})
        obds = [d['OBD'] for d in data]
        obd_mat.append(obds)
        csv_output.writerow([data[0]['pH']] + obds)
        
        for d in data:
            if type(d['reaction prices']) != types.FloatType:
                prices = list(d['reaction prices'].flat)
                shadow_csvs[d['entry']].writerow([pH] + prices)
            
    obd_mat = np.matrix(obd_mat) # rows are pathways and columns are concentrations

    fig = plt.figure(figsize=(6, 6), dpi=90)
    colormap = color.ColorMap(pathway_names)
    for i, name in enumerate(pathway_names):
        plt.plot(pH_vec, obd_mat[:, i], '-', color=colormap[name], 
                 figure=fig)
    plt.title("OBD vs. pH", figure=fig)
    plt.ylim(0, np.max(obd_mat.flat))
    plt.xlabel('pH', figure=fig)
    plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig)
    plt.legend(pathway_names)
    html_writer.write('<h2>Summary figure</h1>\n')
    html_writer.embed_matplotlib_figure(fig)
    
    html_writer.close()
Beispiel #5
0
def compare_charges():
    #db_public = SqliteDatabase('../data/public_data.sqlite')
    db_gibbs = SqliteDatabase('../res/gibbs.sqlite')
    print "Writing Compare Charges report to ../res/groups_report.html"
    html_writer = HtmlWriter("../res/groups_report.html")
    kegg = Kegg.getInstance()

    #pH, I, pMg, T = default_pH, default_I, default_pMg, default_T
    pH, I, pMg, T = default_pH, 0, 14, default_T

    cid2error = {}
    for row_dict in db_gibbs.DictReader("gc_errors"):
        cid = int(row_dict['cid'])
        cid2error[cid] = row_dict['error']

    estimators = {}
    estimators['hatzi'] = Hatzi(use_pKa=False)
    estimators['milo'] = PsuedoisomerTableThermodynamics.FromDatabase(
        db_gibbs, 'gc_pseudoisomers', name='Milo Group Contribution')

    all_cids = set(lsum([e.get_all_cids() for e in estimators.values()]))
    dict_list = []
    for cid in all_cids:
        try:
            name = kegg.cid2name(cid)
            link = kegg.cid2compound(cid).get_link()
        except KeyError:
            name = "unknown"
            link = ""
        row_dict = {
            'cid': '<a href="%s">C%05d</a>' % (link, cid),
            'name': name,
            'error': cid2error.get(cid, None)
        }
        for key, est in estimators.iteritems():
            try:
                pmap = est.cid2PseudoisomerMap(cid)
                dG0, dG0_tag, nH, z, nMg = pmap.GetMostAbundantPseudoisomer(
                    pH, I, pMg, T)
            except MissingCompoundFormationEnergy:
                dG0, dG0_tag, nH, z, nMg = "", "", "", "", ""
            row_dict['nH_' + key] = nH
            row_dict['charge_' + key] = z
            row_dict['nMg_' + key] = nMg
            row_dict['dG0_' + key] = dG0
            row_dict['dG0_tag_' + key] = dG0_tag
        dict_list.append(row_dict)

    html_writer.write_table(
        dict_list,
        headers=['cid', 'name', 'charge_hatzi', 'charge_milo', 'error'])
    html_writer.close()
Beispiel #6
0
def AnalyzePareto(pathway_file, output_prefix, thermo, pH=None):
    pathway_list = KeggFile2PathwayList(pathway_file)
    pathway_names = [entry for (entry, _) in pathway_list]
    html_writer = HtmlWriter('%s.html' % output_prefix)
    xls_workbook = Workbook()

    logging.info("running OBD analysis for all pathways")
    data = GetAllOBDs(pathway_list, html_writer, thermo,
                  pH=pH, section_prefix="pareto", balance_water=True,
                  override_bounds={})
    
    for d in data:
        sheet = xls_workbook.add_sheet(d['entry'])
        sheet.write(0, 0, "reaction")
        sheet.write(0, 1, "formula")
        sheet.write(0, 2, "flux")
        sheet.write(0, 3, "delta_r G'")
        sheet.write(0, 4, "shadow price")
        for r, rid in enumerate(d['rids']):
            sheet.write(r+1, 0, rid)
            sheet.write(r+1, 1, d['formulas'][r])
            sheet.write(r+1, 2, d['fluxes'][0, r])
            sheet.write(r+1, 3, d['dG_r_prime'][0, r])
            sheet.write(r+1, 4, d['reaction prices'][r, 0])
    
    xls_workbook.save('%s.xls' % output_prefix)

    obds = []
    minus_avg_tg = []
    for i, d in enumerate(data):
        obds.append(d['OBD'])
        if d['sum of fluxes']:
            minus_avg_tg.append(-d['max total dG']/d['sum of fluxes'])
        else:
            minus_avg_tg.append(0)
            
    fig = plt.figure(figsize=(6, 6), dpi=90)
    plt.plot(minus_avg_tg, obds, 'o', figure=fig)
    plt.plot([0, max(minus_avg_tg)], [0, max(minus_avg_tg)], '--g')
    for i, name in enumerate(pathway_names):
        plt.text(minus_avg_tg[i], obds[i], name)
    plt.title('OBD vs. Average $\Delta_r G$')
    plt.ylim(ymin=0)
    plt.xlim(xmin=0)
    plt.xlabel(r'- Average $\Delta_r G$ [kJ/mol]')
    plt.ylabel(r'Optimized Distributed Bottleneck [kJ/mol]')
    html_writer.write('<h2>Pareto figure</h1>\n')
    html_writer.embed_matplotlib_figure(fig)
    html_writer.close()
Beispiel #7
0
def compare_charges():
    #db_public = SqliteDatabase('../data/public_data.sqlite')
    db_gibbs = SqliteDatabase('../res/gibbs.sqlite')
    print "Writing Compare Charges report to ../res/groups_report.html"
    html_writer = HtmlWriter("../res/groups_report.html")
    kegg = Kegg.getInstance()
    
    #pH, I, pMg, T = default_pH, default_I, default_pMg, default_T
    pH, I, pMg, T = default_pH, 0, 14, default_T
    
    cid2error = {}
    for row_dict in db_gibbs.DictReader("gc_errors"):
        cid = int(row_dict['cid'])
        cid2error[cid] = row_dict['error']

    estimators = {}
    estimators['hatzi'] = Hatzi(use_pKa=False)
    estimators['milo'] = PsuedoisomerTableThermodynamics.FromDatabase(
        db_gibbs, 'gc_pseudoisomers', name='Milo Group Contribution')
    
    all_cids = set(lsum([e.get_all_cids() for e in estimators.values()]))
    dict_list = []
    for cid in all_cids:
        try:
            name = kegg.cid2name(cid)
            link = kegg.cid2compound(cid).get_link()
        except KeyError:
            name = "unknown"
            link = ""
        row_dict = {'cid':'<a href="%s">C%05d</a>' % (link, cid),
                    'name':name, 'error':cid2error.get(cid, None)}
        for key, est in estimators.iteritems():
            try:
                pmap = est.cid2PseudoisomerMap(cid)
                dG0, dG0_tag, nH, z, nMg = pmap.GetMostAbundantPseudoisomer(pH, I, pMg, T)
            except MissingCompoundFormationEnergy:
                dG0, dG0_tag, nH, z, nMg = "", "", "", "", ""
            row_dict['nH_' + key] = nH
            row_dict['charge_' + key] = z
            row_dict['nMg_' + key] = nMg
            row_dict['dG0_' + key] = dG0
            row_dict['dG0_tag_' + key] = dG0_tag
        dict_list.append(row_dict)
        
    html_writer.write_table(dict_list, headers=['cid', 'name', 'charge_hatzi', 'charge_milo', 'error'])
    html_writer.close()
Beispiel #8
0
def AnalyzeConcentrationGradient(pathway_file, output_prefix, thermo, conc_range, cids=[], pH=None):
    compound_names = ','.join([thermo.kegg.cid2name(cid) for cid in cids])
    pathway_list = KeggFile2PathwayList(pathway_file)
    pathway_names = [entry for (entry, _) in pathway_list]
    html_writer = HtmlWriter('%s.html' % output_prefix)
    
    # run once just to make sure that the pathways are all working:
    logging.info("testing all pathways with default concentrations")
    data = GetAllOBDs(pathway_list, html_writer, thermo,
                  pH=pH, section_prefix="test", balance_water=True,
                  override_bounds={})
    
    csv_output = csv.writer(open('%s.csv' % output_prefix, 'w'))
    csv_output.writerow(['pH', '[' + compound_names + ']'] + pathway_names)

    conc_vec = 10**(-ParseConcentrationRange(conc_range)) # logarithmic scale between 10mM and 1nM
    override_bounds = {}
    
    obd_mat = []
    for conc in conc_vec.flat:
        for cid in cids:
            override_bounds[cid] = (conc, conc)
        logging.info("[%s] = %.1e M" % (compound_names, conc))
        data = GetAllOBDs(pathway_list, html_writer=None, thermo=thermo,
                      pH=pH, section_prefix="", balance_water=True,
                      override_bounds=override_bounds)
        obds = [d['OBD'] for d in data]
        obd_mat.append(obds)
        csv_output.writerow([data[0]['pH'], conc] + obds)
    obd_mat = np.matrix(obd_mat) # rows are pathways and columns are concentrations

    fig = plt.figure(figsize=(6, 6), dpi=90)
    colormap = color.ColorMap(pathway_names)
    for i, name in enumerate(pathway_names):
        plt.plot(conc_vec, obd_mat[:, i], '-', color=colormap[name], 
                 figure=fig)
    plt.title("OBD vs. [%s]" % (compound_names), figure=fig)
    plt.xscale('log')
    plt.ylim(ymin=0)
    plt.xlabel('[%s] (in M)' % compound_names, figure=fig)
    plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig)
    plt.legend(pathway_names)
    html_writer.write('<h2>Summary figure</h1>\n')
    html_writer.embed_matplotlib_figure(fig)
    html_writer.close()
Beispiel #9
0
def main():
    options, _ = MakeOpts().parse_args(sys.argv)
    
    db = SqliteDatabase("../res/gibbs.sqlite")
    public_db = SqliteDatabase("../data/public_data.sqlite")
    output_filename = os.path.abspath(options.output_filename)
    logging.info('Will write output to %s' % output_filename)
    
    html_writer = HtmlWriter(output_filename)
    nist = Nist(T_range=None)
    nist_regression = NistRegression(db, html_writer=html_writer, nist=nist)
    nist_regression.std_diff_threshold = 5 # the threshold over which to print an analysis of a reaction
    #nist_regression.nist.T_range = None(273.15 + 24, 273.15 + 40)
    #nist_regression.nist.override_I = 0.25
    #nist_regression.nist.override_pMg = 14.0

    html_writer.write("<h2>NIST regression:</h2>")
    if options.use_prior:
        logging.info('Using the data from Alberty as fixed prior')
        prior_thermo = PsuedoisomerTableThermodynamics.FromDatabase(
            public_db, 'alberty_pseudoisomers', name="Alberty")
    else:
        prior_thermo = None
    html_writer.write('</br><b>Regression Tables</b>\n')
    html_writer.insert_toggle(start_here=True)
    nist_regression.Train(options.from_database, prior_thermo)
    html_writer.div_end()
 
    html_writer.write('</br><b>PRC results</b>\n')
    html_writer.insert_toggle(start_here=True)
    nist_regression.WriteDataToHtml(html_writer)
    html_writer.div_end()

    html_writer.write('</br><b>Transformed reaction energies - PRC vs. Observed</b>\n')
    html_writer.insert_toggle(start_here=True)
    N, rmse = nist_regression.VerifyResults()
    html_writer.div_end()
    
    logging.info("Regression results for transformed data:")
    logging.info("N = %d, RMSE = %.1f" % (N, rmse))

    html_writer.close()
Beispiel #10
0
def main():
    estimators = LoadAllEstimators()
    parser = MakeArgParser(estimators)
    args = parser.parse_args()

    thermo = estimators[args.thermodynamics_source]

    kegg_file = ParsedKeggFile.FromKeggFile(args.config_fname)
    entries = kegg_file.entries()
    if len(entries) == 0:
        raise ValueError('No entries in configuration file')
    entry = 'CONFIGURATION'
    if entry not in entries:
        logging.warning(
            'Configuration file does not contain the entry "CONFIGURATION". '
            'Using the first entry by default: %s' % entries[0])
        entry = entries[0]
    p_data = PathwayData.FromFieldMap(kegg_file[entry])
    thermo.SetConditions(pH=p_data.pH, I=p_data.I, T=p_data.T, pMg=p_data.pMg)
    thermo.c_range = p_data.c_range
    bounds = p_data.GetBounds()

    html_writer = HtmlWriter(args.output_prefix + ".html")

    rowdicts = []
    headers = ['Module', 'Name', 'OBD [kJ/mol]', 'Length']
    kegg = Kegg.getInstance()
    for mid in kegg.get_all_mids():
        html_writer.write('<h2 id=M%05d>M%05d: %s</h2>' %
                          (mid, mid, kegg.get_module_name(mid)))
        try:
            d = AnalyzeKeggModule(thermo, mid, bounds, html_writer)
        except KeyError:
            continue
        d['Module'] = '<a href="#M%05d">M%05d</a>' % (mid, mid)
        d['Name'] = kegg.get_module_name(mid)
        rowdicts.append(d)

    rowdicts.sort(key=lambda x: x['OBD [kJ/mol]'])
    html_writer.write_table(rowdicts, headers, decimal=1)
    html_writer.close()
Beispiel #11
0
def main():
    estimators = LoadAllEstimators()
    parser = MakeArgParser(estimators)
    args = parser.parse_args()

    thermo = estimators[args.thermodynamics_source]

    kegg_file = ParsedKeggFile.FromKeggFile(args.config_fname)
    entries = kegg_file.entries()
    if len(entries) == 0:
        raise ValueError('No entries in configuration file')
    entry = 'CONFIGURATION'
    if entry not in entries:
        logging.warning('Configuration file does not contain the entry "CONFIGURATION". '
                        'Using the first entry by default: %s' % entries[0])
        entry = entries[0]
    p_data = PathwayData.FromFieldMap(kegg_file[entry])
    thermo.SetConditions(pH=p_data.pH, I=p_data.I, T=p_data.T, pMg=p_data.pMg)
    thermo.c_range = p_data.c_range
    bounds = p_data.GetBounds()
    
    html_writer = HtmlWriter(args.output_prefix + ".html")

    rowdicts = []
    headers = ['Module', 'Name', 'OBD [kJ/mol]', 'Length']
    kegg = Kegg.getInstance()
    for mid in kegg.get_all_mids():
        html_writer.write('<h2 id=M%05d>M%05d: %s</h2>' %
                          (mid, mid, kegg.get_module_name(mid)))
        try:
            d = AnalyzeKeggModule(thermo, mid, bounds, html_writer)
        except KeyError:
            continue
        d['Module'] = '<a href="#M%05d">M%05d</a>' % (mid, mid)
        d['Name'] = kegg.get_module_name(mid)
        rowdicts.append(d)
    
    rowdicts.sort(key=lambda x:x['OBD [kJ/mol]'])
    html_writer.write_table(rowdicts, headers, decimal=1)
    html_writer.close()
Beispiel #12
0
def AnalyzeConcentrationGradient(prefix, thermo, csv_output_fname, cid=13): # default compound is PPi
    compound_name = thermo.kegg.cid2name(cid)
    kegg_file = ParsedKeggFile.FromKeggFile('../data/thermodynamics/%s.txt' % prefix)
    html_writer = HtmlWriter('../res/%s.html' % prefix)
    null_html_writer = NullHtmlWriter()
    if csv_output_fname:
        csv_output = csv.writer(open(csv_output_fname, 'w'))
        csv_output.writerow(['pH', 'I', 'T', '[C%05d]' % cid] + kegg_file.entries())
    else:
        csv_output = None

    pH_vec = np.array([7]) # this needs to be fixed so that the txt file will set the pH
    conc_vec = 10**(-np.arange(2, 6.0001, 0.25)) # logarithmic scale between 10mM and 1nM
    override_bounds = {}
    
    fig = plt.figure(figsize=(6, 6), dpi=90)
    legend = []
    for pH in pH_vec.flat:
        obd_vec = []
        for conc in conc_vec.flat:
            override_bounds[cid] = (conc, conc)
            logging.info("pH = %g, [%s] = %.1e M" % (pH, compound_name, conc))
            data, labels = pareto(kegg_file, null_html_writer, thermo,
                pH=pH, section_prefix="", balance_water=True,
                override_bounds=override_bounds)
            obd_vec.append(data[:, 1])
            csv_output.writerow([pH, thermo.I, thermo.T, conc] + list(data[:, 1].flat))
        obd_mat = np.matrix(obd_vec) # rows are pathways and columns are concentrations
        plt.plot(conc_vec, obd_mat, '.-', figure=fig)
        legend += ['%s, pH = %g' % (l, pH) for l in labels]
    
    plt.title("ODB vs. [%s] (I = %gM, T = %gK)" % (compound_name, thermo.I, thermo.T), figure=fig)
    plt.xscale('log')
    plt.xlabel('Concentration of %s [M]' % thermo.kegg.cid2name(cid), figure=fig)
    plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig)
    plt.legend(legend)
    html_writer.write('<h2 id="figure_%s">Summary figure</h1>\n' % prefix)
    html_writer.embed_matplotlib_figure(fig, name=prefix)
    
    html_writer.close()
def main():
    db = database.SqliteDatabase('../res/gibbs.sqlite')
    html_writer = HtmlWriter("../res/nist/report.html")
    gc = GroupContribution(db)
    gc.override_gc_with_measurements = True
    gc.init()
    grad = GradientAscent(gc)
    nist = Nist(db, html_writer, gc.kegg())
    nist.FromDatabase()
    alberty = Alberty()
    hatzi = Hatzi()
    
    if True:
        grad.load_nist_data(nist, alberty, skip_missing_reactions=False, T_range=(298, 314))
        grad.verify_results("Alberty", alberty, html_writer)
        
        #grad.write_pseudoisomers("../res/nist/nist_dG0_f.csv")

        #html_writer.write("<h2>Using Group Contribution (Hatzimanikatis' implementation)</h2>")
        #html_writer.write("<h3>Correlation with the reduced NIST database (containing only compounds that appear in Alberty's list)</h3>")
        #logging.info("calculate the correlation between Hatzimanikatis' predictions and the reduced NIST database")
        #grad.verify_results("Hatzimanikatis_Reduced", hatzi, html_writer)

        #grad.load_nist_data(nist, hatzi, skip_missing_reactions=True, T_range=(298, 314))
        grad.verify_results("Hatzimanikatis", hatzi, html_writer)

        #grad.load_nist_data(nist, gc, skip_missing_reactions=True, T_range=(298, 314))
        grad.verify_results("Milo", gc, html_writer)
    elif False:
        # Run the gradient ascent algorithm, where the starting point is the same file used for training the GC algorithm
        grad.load_dG0_data("../data/thermodynamics/dG0.csv")
        # load the data for the anchors (i.e. compounds whose dG0 should not be changed - usually their value will be 0). 
        grad.anchors = grad.load_dG0_data("../data/thermodynamics/nist_anchors.csv")
        grad.load_nist_data(nist, grad, skip_missing_reactions=True)
        print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data))
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient1")
        
    elif False:
        # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006)
        grad.load_nist_data(nist, alberty, skip_missing_reactions=True)
        print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data))
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient2")
    
    elif False:
        # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006)
        # Use DETERMINISTIC gradient ascent
        grad.load_nist_data(nist, alberty, skip_missing_reactions=True, T_range=(24 + 273.15, 40 + 273.15))
        print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data))
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        grad.deterministic_hill_climb(max_i=200)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient_deterministic")
        
    elif False:
        # Run the gradient ascent algorithm, where the starting point arbitrary (predict all of the NIST compounds)
        grad = GradientAscent(gc)
        grad.load_nist_data(nist, skip_missing_reactions=False)
        print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data))
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient3")
    
    elif False: # Use Alberty's table from (Mathematica 2006) to calculate the dG0 of all possible reactions in KEGG
        grad = GradientAscent(gc)
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        (pH, I, T) = (7, 0, 300)
        counter = 0
        for rid in grad.kegg.get_all_rids():
            sparse_reaction = grad.kegg.rid2sparse_reaction(rid)
            try:
                dG0 = grad.reaction_to_dG0(sparse_reaction, pH, I, T)
                print "R%05d: dG0_r = %.2f [kJ/mol]" % (rid, dG0)
                counter += 1
            except MissingCompoundFormationEnergy as e:
                #print "R%05d: missing formation energy of C%05d" % (rid, e.cid)
                pass
        print "Managed to calculate the dG0 of %d reactions" % counter
        
    elif False:
        util._mkdir("../res/nist/fig")
        csv_writer = csv.writer(open("../res/nist/pseudoisomers.csv", "w"))
                
        cid_set = set()
        for row in nist.data:
            sparce_reaction = row['sparse']
            cid_set.update(sparce_reaction.keys())
        
        html_writer.write("<table border=1>\n")
        for cid in sorted(list(cid_set)):
            html_writer.write("  <tr><td>C%05d</td><td>%s</td><td>" % (cid, grad.kegg.cid2name(cid)))
            try:
                mol = grad.kegg.cid2mol(cid)
                img_fname = '../res/nist/fig/C%05d.png' % cid
                html_writer.embed_img(img_fname, "C%05d" % cid)
                mol.draw(show=False, filename=img_fname)
            except AssertionError as e:
                html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e)))
            except KeggParseException as e:
                html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e)))
            html_writer.write("</td><td>")
            if (cid in alberty.cid2pmap_dict):
                for (nH, z) in alberty.cid2pmap_dict[cid].keys():
                    html_writer.write("(nH=%d, z=%d)<br>" % (nH, z))
                    csv_writer.writerow((cid, nH, z))
            else:
                nH = grad.kegg.cid2num_hydrogens(cid)
                z = grad.kegg.cid2charge(cid)
                html_writer.write("unknown pseudoisomers<br>")
                html_writer.write("(nH=%d, z=%d)" % (nH, z))
                csv_writer.writerow((cid, nH, z))
            
            html_writer.write("</td></tr>\n")
        html_writer.write("</table>\n")
    html_writer.close()
def main():
    db = database.SqliteDatabase('../res/gibbs.sqlite')
    html_writer = HtmlWriter("../res/nist/report.html")
    gc = GroupContribution(db)
    gc.override_gc_with_measurements = True
    gc.init()
    grad = GradientAscent(gc)
    nist = Nist(db, html_writer, gc.kegg())
    nist.FromDatabase()
    alberty = Alberty()
    hatzi = Hatzi()

    if True:
        grad.load_nist_data(nist,
                            alberty,
                            skip_missing_reactions=False,
                            T_range=(298, 314))
        grad.verify_results("Alberty", alberty, html_writer)

        #grad.write_pseudoisomers("../res/nist/nist_dG0_f.csv")

        #html_writer.write("<h2>Using Group Contribution (Hatzimanikatis' implementation)</h2>")
        #html_writer.write("<h3>Correlation with the reduced NIST database (containing only compounds that appear in Alberty's list)</h3>")
        #logging.info("calculate the correlation between Hatzimanikatis' predictions and the reduced NIST database")
        #grad.verify_results("Hatzimanikatis_Reduced", hatzi, html_writer)

        #grad.load_nist_data(nist, hatzi, skip_missing_reactions=True, T_range=(298, 314))
        grad.verify_results("Hatzimanikatis", hatzi, html_writer)

        #grad.load_nist_data(nist, gc, skip_missing_reactions=True, T_range=(298, 314))
        grad.verify_results("Milo", gc, html_writer)
    elif False:
        # Run the gradient ascent algorithm, where the starting point is the same file used for training the GC algorithm
        grad.load_dG0_data("../data/thermodynamics/dG0.csv")
        # load the data for the anchors (i.e. compounds whose dG0 should not be changed - usually their value will be 0).
        grad.anchors = grad.load_dG0_data(
            "../data/thermodynamics/nist_anchors.csv")
        grad.load_nist_data(nist, grad, skip_missing_reactions=True)
        print "Training %d compounds using %d reactions: " % (len(
            grad.cid2pmap_dict.keys()), len(grad.data))
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient1")

    elif False:
        # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006)
        grad.load_nist_data(nist, alberty, skip_missing_reactions=True)
        print "Training %d compounds using %d reactions: " % (len(
            grad.cid2pmap_dict.keys()), len(grad.data))
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient2")

    elif False:
        # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006)
        # Use DETERMINISTIC gradient ascent
        grad.load_nist_data(nist,
                            alberty,
                            skip_missing_reactions=True,
                            T_range=(24 + 273.15, 40 + 273.15))
        print "Training %d compounds using %d reactions: " % (len(
            grad.cid2pmap_dict.keys()), len(grad.data))
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        grad.deterministic_hill_climb(max_i=200)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient_deterministic")

    elif False:
        # Run the gradient ascent algorithm, where the starting point arbitrary (predict all of the NIST compounds)
        grad = GradientAscent(gc)
        grad.load_nist_data(nist, skip_missing_reactions=False)
        print "Training %d compounds using %d reactions: " % (len(
            grad.cid2pmap_dict.keys()), len(grad.data))
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient3")

    elif False:  # Use Alberty's table from (Mathematica 2006) to calculate the dG0 of all possible reactions in KEGG
        grad = GradientAscent(gc)
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        (pH, I, T) = (7, 0, 300)
        counter = 0
        for rid in grad.kegg.get_all_rids():
            sparse_reaction = grad.kegg.rid2sparse_reaction(rid)
            try:
                dG0 = grad.reaction_to_dG0(sparse_reaction, pH, I, T)
                print "R%05d: dG0_r = %.2f [kJ/mol]" % (rid, dG0)
                counter += 1
            except MissingCompoundFormationEnergy as e:
                #print "R%05d: missing formation energy of C%05d" % (rid, e.cid)
                pass
        print "Managed to calculate the dG0 of %d reactions" % counter

    elif False:
        util._mkdir("../res/nist/fig")
        csv_writer = csv.writer(open("../res/nist/pseudoisomers.csv", "w"))

        cid_set = set()
        for row in nist.data:
            sparce_reaction = row['sparse']
            cid_set.update(sparce_reaction.keys())

        html_writer.write("<table border=1>\n")
        for cid in sorted(list(cid_set)):
            html_writer.write("  <tr><td>C%05d</td><td>%s</td><td>" %
                              (cid, grad.kegg.cid2name(cid)))
            try:
                mol = grad.kegg.cid2mol(cid)
                img_fname = '../res/nist/fig/C%05d.png' % cid
                html_writer.embed_img(img_fname, "C%05d" % cid)
                mol.draw(show=False, filename=img_fname)
            except AssertionError as e:
                html_writer.write("WARNING: cannot draw C%05d - %s" %
                                  (cid, str(e)))
            except KeggParseException as e:
                html_writer.write("WARNING: cannot draw C%05d - %s" %
                                  (cid, str(e)))
            html_writer.write("</td><td>")
            if (cid in alberty.cid2pmap_dict):
                for (nH, z) in alberty.cid2pmap_dict[cid].keys():
                    html_writer.write("(nH=%d, z=%d)<br>" % (nH, z))
                    csv_writer.writerow((cid, nH, z))
            else:
                nH = grad.kegg.cid2num_hydrogens(cid)
                z = grad.kegg.cid2charge(cid)
                html_writer.write("unknown pseudoisomers<br>")
                html_writer.write("(nH=%d, z=%d)" % (nH, z))
                csv_writer.writerow((cid, nH, z))

            html_writer.write("</td></tr>\n")
        html_writer.write("</table>\n")
    html_writer.close()
Beispiel #15
0
                continue
            if self.override_pMg or self.override_I or self.override_T:
                nist_row_copy = nist_row_data.Clone()
                if self.override_pMg:
                    nist_row_copy.pMg = self.override_pMg
                if self.override_I:
                    nist_row_copy.I = self.override_I
                if self.override_T:
                    nist_row_copy.T = self.override_T
                rows.append(nist_row_copy)
            else:
                rows.append(nist_row_data)
        return rows

    def GetUniqueReactionSet(self):
        return set([row.reaction for row in self.data])


if __name__ == '__main__':
    #logging.getLogger('').setLevel(logging.DEBUG)
    _mkdir("../res/nist")
    html_writer = HtmlWriter("../res/nist/statistics.html")
    nist = Nist()
    fp = open('../res/nist_kegg_ids.txt', 'w')
    for cid in nist.GetAllCids():
        fp.write("C%05d\n" % cid)
    fp.close()
    nist.AnalyzeStats(html_writer)
    nist.AnalyzeConnectivity(html_writer)
    html_writer.close()
Beispiel #16
0
                continue
            if self.override_pMg or self.override_I or self.override_T:
                nist_row_copy = nist_row_data.Clone()
                if self.override_pMg:
                    nist_row_copy.pMg = self.override_pMg
                if self.override_I:
                    nist_row_copy.I = self.override_I
                if self.override_T:
                    nist_row_copy.T = self.override_T
                rows.append(nist_row_copy)
            else:
                rows.append(nist_row_data)
        return rows
    
    def GetUniqueReactionSet(self):
        return set([row.reaction for row in self.data])


if __name__ == '__main__':
    #logging.getLogger('').setLevel(logging.DEBUG)
    _mkdir("../res/nist")
    html_writer = HtmlWriter("../res/nist/statistics.html")
    nist = Nist()
    fp = open('../res/nist_kegg_ids.txt', 'w')
    for cid in nist.GetAllCids():
        fp.write("C%05d\n" % cid)
    fp.close()
    nist.AnalyzeStats(html_writer)
    nist.AnalyzeConnectivity(html_writer)
    html_writer.close()
Beispiel #17
0
    def find_path(self, experiment_name, net_reaction):
        """Find a pathway from the source to the target.
        
        Args:    
            experiment_name: a name given to this experiment.
            net_reaction: a Reaction describing the net reaction for the desired paths
        """
        dirname = os.path.join('../res/pathologic/', experiment_name)
        logging.info('Writing output to: %s' % dirname)
        util._mkdir(dirname)
        
        self.html_writer.write('<a href="pathologic/' + experiment_name + '.html">' + experiment_name + '</a><br>\n')
        exp_html = HtmlWriter('../res/pathologic/' + experiment_name + '.html')
        exp_html.write("<p><h1>%s</h1>\n" % experiment_name)

        exp_html.insert_toggle(div_id="__parameters__", start_here=True,
                               label='Show Parameters')
        
        f, S, compounds, reactions = self.kegg_pathologic.get_unique_cids_and_reactions()

        exp_html.write('<h2>Conditions:</h2>\n')
        exp_html.write_ul(['Optimization method: %s' % self.thermodynamic_method,
                           'Concentration range: %g M < C < %g M' % (self.thermo.c_range[0], self.thermo.c_range[1]),
                           "Max &Delta;<sub>r</sub>G' = %.1f" % self.maximal_dG,
                           'pH = %g' % self.thermo.pH,
                           'I = %g' % self.thermo.I,
                           'T = %g' % self.thermo.T,
                           'Max no. reactions: %d' % (self.max_reactions or -1),
                           'Max no. solutions: %d' % (self.max_solutions or -1),
                           'Overall Reaction: %s' % net_reaction.to_hypertext(),
                           '%d reactions' % len(reactions),
                           '%d unique compounds' % len(compounds)])

        exp_html.div_end()
        exp_html.write('</br>\n')
        
        logging.debug("All compounds:")
        for i, compound in enumerate(compounds):
            logging.debug("%05d) C%05d = %s" % (i, compound.cid, compound.name))
        logging.debug("All reactions:")
        for i, reaction in enumerate(reactions):
            logging.debug("%05d) R%05d = %s" % (i, reaction.rid, str(reaction)))

        output_kegg_file = open(dirname + '/kegg_pathway.txt', 'w')
        exp_html.write('<a href="%s/kegg_pathway.txt">All solutions in KEGG format</a></br>\n'
                       % experiment_name)
        
        # Find a solution with a minimal total flux
        logging.info("Preparing LP solver for the minimal total flux problem")
        exp_html.write('<b>Minimum flux</b>')
        slip = Stoichiometric_LP("Pathologic")
        slip.add_stoichiometric_constraints(f, S, compounds, reactions, net_reaction)
        slip.export("../res/pathologic/%s/%03d_lp.txt" % (experiment_name, 0))
        exp_html.write(' (<a href="%s/%03d_lp.txt">LP file</a>): ' % (experiment_name, 0))
        logging.info("Solving")
        if not slip.solve():
            exp_html.write("<b>There are no solutions!</b>")
            logging.warning("There are no solutions. Quitting!")
            return
        logging.info("writing solution")
        self.write_current_solution(exp_html, slip, experiment_name)

        logging.info("Preparing MILP solver")
        milp = Stoichiometric_LP("Pathologic")
        milp.solution_index = 1
        milp.add_stoichiometric_constraints(f, S, compounds, reactions, net_reaction)
        milp.add_milp_variables()
        if self.max_reactions is not None:
            milp.add_reaction_num_constraint(self.max_reactions)
       
        if self.thermodynamic_method == OptimizationMethods.LOCALIZED:
            milp.add_localized_dGf_constraints(self.thermo)
        else:
            milp.add_dGr_constraints(self.thermo,
                                     optimization=self.thermodynamic_method,
                                     maximal_dG=self.maximal_dG)
        
        index = 0
        while (self.max_solutions is None) or (index < self.max_solutions):
            index += 1
            # create the MILP problem to constrain the previous solutions not to reappear again.
            logging.info("Round %03d, solving using MILP" % (milp.solution_index))
            milp.export("../res/pathologic/%s/%03d_lp.txt" % (experiment_name, milp.solution_index))
            exp_html.write('<b>Solution #%d</b> (<a href="%s/%03d_lp.txt">LP file</a>): '  % (index, experiment_name, index))
            if not milp.solve():
                exp_html.write("<b>No solution found</b>")
                logging.info("No more solutions. Quitting!")
                break
            logging.info("writing solution")
            self.write_current_solution(exp_html, milp, experiment_name,
                                        output_kegg_file)
            milp.ban_current_solution()
        
        output_kegg_file.close()
        exp_html.close()
Beispiel #18
0
def AnalyzeConcentrationGradient(pathway_file,
                                 output_prefix,
                                 thermo,
                                 conc_range,
                                 cids=[],
                                 pH=None):
    compound_names = ','.join([thermo.kegg.cid2name(cid) for cid in cids])
    pathway_list = KeggFile2PathwayList(pathway_file)
    pathway_names = [entry for (entry, _) in pathway_list]
    html_writer = HtmlWriter('%s.html' % output_prefix)

    # run once just to make sure that the pathways are all working:
    logging.info("testing all pathways with default concentrations")
    data = GetAllOBDs(pathway_list,
                      html_writer,
                      thermo,
                      pH=pH,
                      section_prefix="test",
                      balance_water=True,
                      override_bounds={})

    csv_output = csv.writer(open('%s.csv' % output_prefix, 'w'))
    csv_output.writerow(['pH', '[' + compound_names + ']'] + pathway_names)

    conc_vec = 10**(-ParseConcentrationRange(conc_range)
                    )  # logarithmic scale between 10mM and 1nM
    override_bounds = {}

    obd_mat = []
    for conc in conc_vec.flat:
        for cid in cids:
            override_bounds[cid] = (conc, conc)
        logging.info("[%s] = %.1e M" % (compound_names, conc))
        data = GetAllOBDs(pathway_list,
                          html_writer=None,
                          thermo=thermo,
                          pH=pH,
                          section_prefix="",
                          balance_water=True,
                          override_bounds=override_bounds)
        obds = [d['OBD'] for d in data]
        obd_mat.append(obds)
        csv_output.writerow([data[0]['pH'], conc] + obds)
    obd_mat = np.matrix(
        obd_mat)  # rows are pathways and columns are concentrations

    fig = plt.figure(figsize=(6, 6), dpi=90)
    colormap = color.ColorMap(pathway_names)
    for i, name in enumerate(pathway_names):
        plt.plot(conc_vec,
                 obd_mat[:, i],
                 '-',
                 color=colormap[name],
                 figure=fig)
    plt.title("OBD vs. [%s]" % (compound_names), figure=fig)
    plt.xscale('log')
    plt.ylim(ymin=0)
    plt.xlabel('[%s] (in M)' % compound_names, figure=fig)
    plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig)
    plt.legend(pathway_names)
    html_writer.write('<h2>Summary figure</h1>\n')
    html_writer.embed_matplotlib_figure(fig)
    html_writer.close()
Beispiel #19
0
def analyze(prefix, thermo):    
    kegg_file = ParsedKeggFile.FromKeggFile('../data/thermodynamics/%s.txt' % prefix)
    html_writer = HtmlWriter('../res/%s.html' % prefix)

    co2_hydration = Reaction.FromFormula("C00011 + C00001 => C00288")
    
    #pH_vec = np.arange(5, 9.001, 0.5)
    #pH_vec = np.array([6, 7, 8])
    pH_vec = np.array([6, 7, 8]) # this needs to be fixed so that the txt file will set the pH
    #co2_conc_vec = np.array([1e-5, 1e-3])
    co2_conc_vec = np.array([1e-5])
    data_mat = []
    override_bounds = {}
    
    for pH in pH_vec.flat:
        co2_hydration_dG0_prime = float(thermo.GetTransfromedKeggReactionEnergies([co2_hydration], pH=pH))
        for co2_conc in co2_conc_vec.flat:
            carbonate_conc = co2_conc * np.exp(-co2_hydration_dG0_prime / (R*default_T))
            #print "[CO2] = %g, [carbonate] = %g, pH = %.1f, I = %.2fM" % (co2_conc, carbonate_conc, pH, I)
            override_bounds[11] = (co2_conc, co2_conc)
            override_bounds[288] = (carbonate_conc, carbonate_conc)
            
            section_prefix = 'pH_%g_CO2_%g' % (pH, co2_conc*1000)
            section_title = 'pH = %g, [CO2] = %g mM' % (pH, co2_conc*1000)
            html_writer.write('<h1 id="%s_title">%s</h1>\n' %
                              (section_prefix, section_title))
            html_writer.write_ul(['<a href="#%s_tables">Individual result tables</a>' % section_prefix,
                                  '<a href="#%s_summary">Summary table</a>' % section_prefix,
                                  '<a href="#%s_figure">Summary figure</a>' % section_prefix])

            data, labels = pareto(kegg_file, html_writer, thermo,
                pH=pH, section_prefix=section_prefix, balance_water=True,
                override_bounds=override_bounds)
            data_mat.append(data)
    
    data_mat = np.array(data_mat)
    if data_mat.shape[0] == 1:
        pareto_fig = plt.figure(figsize=(6, 6), dpi=90)
        plt.plot(data_mat[0, :, 0], data_mat[0, :, 1], '.', figure=pareto_fig)
        for i in xrange(data_mat.shape[1]):
            if data[i, 1] < 0:
                color = 'grey'
            else:
                color = 'black'
            plt.text(data_mat[0, i, 0], data_mat[0, i, 1], labels[i],
                     ha='left', va='bottom',
                     fontsize=8, color=color, figure=pareto_fig)
        plt.title(section_title, figure=pareto_fig)
    else:
        pareto_fig = plt.figure(figsize=(10, 10), dpi=90)
        for i in xrange(data_mat.shape[1]):
            plt.plot(data_mat[:, i, 0], data_mat[:, i, 1], '-', figure=pareto_fig)
            plt.text(data_mat[0, i, 0], data_mat[0, i, 1], '%g' % pH_vec[0],
                     ha='center', fontsize=6, color='black', figure=pareto_fig)
            plt.text(data_mat[-1, i, 0], data_mat[-1, i, 1], '%g' % pH_vec[-1],
                     ha='center', fontsize=6, color='black', figure=pareto_fig)
        plt.legend(labels, loc='upper right')
        plt.title('Pareto', figure=pareto_fig)
    
    plt.xlabel('Optimal Energetic Efficiency [kJ/mol]', figure=pareto_fig)
    plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=pareto_fig)
    html_writer.write('<h2 id="%s_figure">Summary figure</h1>\n' % section_prefix)

    # plot the Pareto figure showing all values (including infeasible)
    html_writer.embed_matplotlib_figure(pareto_fig, name=prefix + '_0')

    # set axes to hide infeasible pathways and focus on feasible ones
    pareto_fig.axes[0].set_xlim(None, 0)
    pareto_fig.axes[0].set_ylim(0, None)
    html_writer.embed_matplotlib_figure(pareto_fig, name=prefix + '_1')
    
    html_writer.close()
Beispiel #20
0
def AnalyzePHGradient(pathway_file, output_prefix, thermo, conc_range):
    pathway_list = KeggFile2PathwayList(pathway_file)
    pathway_names = [entry for (entry, _) in pathway_list]
    html_writer = HtmlWriter('%s.html' % output_prefix)

    # run once just to make sure that the pathways are all working:
    logging.info("testing all pathways with default pH")
    data = GetAllOBDs(pathway_list,
                      html_writer,
                      thermo,
                      pH=None,
                      section_prefix="test",
                      balance_water=True,
                      override_bounds={})

    csv_output = csv.writer(open('%s.csv' % output_prefix, 'w'))
    csv_output.writerow(['pH'] + pathway_names)

    util._mkdir(output_prefix)
    shadow_csvs = {}
    for d in data:
        path = '%s/%s.csv' % (output_prefix, d['entry'])
        shadow_csvs[d['entry']] = csv.writer(open(path, 'w'))
        shadow_csvs[d['entry']].writerow(['pH'] + d['rids'])

    pH_vec = ParseConcentrationRange(conc_range)
    obd_mat = []
    for pH in pH_vec.flat:
        logging.info("pH = %.1f" % (pH))
        data = GetAllOBDs(pathway_list,
                          html_writer=None,
                          thermo=thermo,
                          pH=pH,
                          section_prefix="",
                          balance_water=True,
                          override_bounds={})
        obds = [d['OBD'] for d in data]
        obd_mat.append(obds)
        csv_output.writerow([data[0]['pH']] + obds)

        for d in data:
            if type(d['reaction prices']) != types.FloatType:
                prices = list(d['reaction prices'].flat)
                shadow_csvs[d['entry']].writerow([pH] + prices)

    obd_mat = np.matrix(
        obd_mat)  # rows are pathways and columns are concentrations

    fig = plt.figure(figsize=(6, 6), dpi=90)
    colormap = color.ColorMap(pathway_names)
    for i, name in enumerate(pathway_names):
        plt.plot(pH_vec, obd_mat[:, i], '-', color=colormap[name], figure=fig)
    plt.title("OBD vs. pH", figure=fig)
    plt.ylim(0, np.max(obd_mat.flat))
    plt.xlabel('pH', figure=fig)
    plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig)
    plt.legend(pathway_names)
    html_writer.write('<h2>Summary figure</h1>\n')
    html_writer.embed_matplotlib_figure(fig)

    html_writer.close()
Beispiel #21
0
def analyze(prefix, thermo):
    kegg_file = ParsedKeggFile.FromKeggFile('../data/thermodynamics/%s.txt' %
                                            prefix)
    html_writer = HtmlWriter('../res/%s.html' % prefix)

    co2_hydration = Reaction.FromFormula("C00011 + C00001 => C00288")

    #pH_vec = np.arange(5, 9.001, 0.5)
    #pH_vec = np.array([6, 7, 8])
    pH_vec = np.array(
        [6, 7,
         8])  # this needs to be fixed so that the txt file will set the pH
    #co2_conc_vec = np.array([1e-5, 1e-3])
    co2_conc_vec = np.array([1e-5])
    data_mat = []
    override_bounds = {}

    for pH in pH_vec.flat:
        co2_hydration_dG0_prime = float(
            thermo.GetTransfromedKeggReactionEnergies([co2_hydration], pH=pH))
        for co2_conc in co2_conc_vec.flat:
            carbonate_conc = co2_conc * np.exp(-co2_hydration_dG0_prime /
                                               (R * default_T))
            #print "[CO2] = %g, [carbonate] = %g, pH = %.1f, I = %.2fM" % (co2_conc, carbonate_conc, pH, I)
            override_bounds[11] = (co2_conc, co2_conc)
            override_bounds[288] = (carbonate_conc, carbonate_conc)

            section_prefix = 'pH_%g_CO2_%g' % (pH, co2_conc * 1000)
            section_title = 'pH = %g, [CO2] = %g mM' % (pH, co2_conc * 1000)
            html_writer.write('<h1 id="%s_title">%s</h1>\n' %
                              (section_prefix, section_title))
            html_writer.write_ul([
                '<a href="#%s_tables">Individual result tables</a>' %
                section_prefix,
                '<a href="#%s_summary">Summary table</a>' % section_prefix,
                '<a href="#%s_figure">Summary figure</a>' % section_prefix
            ])

            data, labels = pareto(kegg_file,
                                  html_writer,
                                  thermo,
                                  pH=pH,
                                  section_prefix=section_prefix,
                                  balance_water=True,
                                  override_bounds=override_bounds)
            data_mat.append(data)

    data_mat = np.array(data_mat)
    if data_mat.shape[0] == 1:
        pareto_fig = plt.figure(figsize=(6, 6), dpi=90)
        plt.plot(data_mat[0, :, 0], data_mat[0, :, 1], '.', figure=pareto_fig)
        for i in xrange(data_mat.shape[1]):
            if data[i, 1] < 0:
                color = 'grey'
            else:
                color = 'black'
            plt.text(data_mat[0, i, 0],
                     data_mat[0, i, 1],
                     labels[i],
                     ha='left',
                     va='bottom',
                     fontsize=8,
                     color=color,
                     figure=pareto_fig)
        plt.title(section_title, figure=pareto_fig)
    else:
        pareto_fig = plt.figure(figsize=(10, 10), dpi=90)
        for i in xrange(data_mat.shape[1]):
            plt.plot(data_mat[:, i, 0],
                     data_mat[:, i, 1],
                     '-',
                     figure=pareto_fig)
            plt.text(data_mat[0, i, 0],
                     data_mat[0, i, 1],
                     '%g' % pH_vec[0],
                     ha='center',
                     fontsize=6,
                     color='black',
                     figure=pareto_fig)
            plt.text(data_mat[-1, i, 0],
                     data_mat[-1, i, 1],
                     '%g' % pH_vec[-1],
                     ha='center',
                     fontsize=6,
                     color='black',
                     figure=pareto_fig)
        plt.legend(labels, loc='upper right')
        plt.title('Pareto', figure=pareto_fig)

    plt.xlabel('Optimal Energetic Efficiency [kJ/mol]', figure=pareto_fig)
    plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=pareto_fig)
    html_writer.write('<h2 id="%s_figure">Summary figure</h1>\n' %
                      section_prefix)

    # plot the Pareto figure showing all values (including infeasible)
    html_writer.embed_matplotlib_figure(pareto_fig, name=prefix + '_0')

    # set axes to hide infeasible pathways and focus on feasible ones
    pareto_fig.axes[0].set_xlim(None, 0)
    pareto_fig.axes[0].set_ylim(0, None)
    html_writer.embed_matplotlib_figure(pareto_fig, name=prefix + '_1')

    html_writer.close()