Python HtmlWriter.write_table Exemples, toolbox.html_writer.HtmlWriter.write_table Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : groups_report.py Projet : titus0810/milo-lab

def compare_charges():
    #db_public = SqliteDatabase('../data/public_data.sqlite')
    db_gibbs = SqliteDatabase('../res/gibbs.sqlite')
    print "Writing Compare Charges report to ../res/groups_report.html"
    html_writer = HtmlWriter("../res/groups_report.html")
    kegg = Kegg.getInstance()

    #pH, I, pMg, T = default_pH, default_I, default_pMg, default_T
    pH, I, pMg, T = default_pH, 0, 14, default_T

    cid2error = {}
    for row_dict in db_gibbs.DictReader("gc_errors"):
        cid = int(row_dict['cid'])
        cid2error[cid] = row_dict['error']

    estimators = {}
    estimators['hatzi'] = Hatzi(use_pKa=False)
    estimators['milo'] = PsuedoisomerTableThermodynamics.FromDatabase(
        db_gibbs, 'gc_pseudoisomers', name='Milo Group Contribution')

    all_cids = set(lsum([e.get_all_cids() for e in estimators.values()]))
    dict_list = []
    for cid in all_cids:
        try:
            name = kegg.cid2name(cid)
            link = kegg.cid2compound(cid).get_link()
        except KeyError:
            name = "unknown"
            link = ""
        row_dict = {
            'cid': '<a href="%s">C%05d</a>' % (link, cid),
            'name': name,
            'error': cid2error.get(cid, None)
        }
        for key, est in estimators.iteritems():
            try:
                pmap = est.cid2PseudoisomerMap(cid)
                dG0, dG0_tag, nH, z, nMg = pmap.GetMostAbundantPseudoisomer(
                    pH, I, pMg, T)
            except MissingCompoundFormationEnergy:
                dG0, dG0_tag, nH, z, nMg = "", "", "", "", ""
            row_dict['nH_' + key] = nH
            row_dict['charge_' + key] = z
            row_dict['nMg_' + key] = nMg
            row_dict['dG0_' + key] = dG0
            row_dict['dG0_tag_' + key] = dG0_tag
        dict_list.append(row_dict)

    html_writer.write_table(
        dict_list,
        headers=['cid', 'name', 'charge_hatzi', 'charge_milo', 'error'])
    html_writer.close()

Exemple #2

0

Afficher le fichier

Fichier : groups_report.py Projet : issfangks/milo-lab

def compare_charges():
    #db_public = SqliteDatabase('../data/public_data.sqlite')
    db_gibbs = SqliteDatabase('../res/gibbs.sqlite')
    print "Writing Compare Charges report to ../res/groups_report.html"
    html_writer = HtmlWriter("../res/groups_report.html")
    kegg = Kegg.getInstance()
    
    #pH, I, pMg, T = default_pH, default_I, default_pMg, default_T
    pH, I, pMg, T = default_pH, 0, 14, default_T
    
    cid2error = {}
    for row_dict in db_gibbs.DictReader("gc_errors"):
        cid = int(row_dict['cid'])
        cid2error[cid] = row_dict['error']

    estimators = {}
    estimators['hatzi'] = Hatzi(use_pKa=False)
    estimators['milo'] = PsuedoisomerTableThermodynamics.FromDatabase(
        db_gibbs, 'gc_pseudoisomers', name='Milo Group Contribution')
    
    all_cids = set(lsum([e.get_all_cids() for e in estimators.values()]))
    dict_list = []
    for cid in all_cids:
        try:
            name = kegg.cid2name(cid)
            link = kegg.cid2compound(cid).get_link()
        except KeyError:
            name = "unknown"
            link = ""
        row_dict = {'cid':'<a href="%s">C%05d</a>' % (link, cid),
                    'name':name, 'error':cid2error.get(cid, None)}
        for key, est in estimators.iteritems():
            try:
                pmap = est.cid2PseudoisomerMap(cid)
                dG0, dG0_tag, nH, z, nMg = pmap.GetMostAbundantPseudoisomer(pH, I, pMg, T)
            except MissingCompoundFormationEnergy:
                dG0, dG0_tag, nH, z, nMg = "", "", "", "", ""
            row_dict['nH_' + key] = nH
            row_dict['charge_' + key] = z
            row_dict['nMg_' + key] = nMg
            row_dict['dG0_' + key] = dG0
            row_dict['dG0_tag_' + key] = dG0_tag
        dict_list.append(row_dict)
        
    html_writer.write_table(dict_list, headers=['cid', 'name', 'charge_hatzi', 'charge_milo', 'error'])
    html_writer.close()

Exemple #3

0

Afficher le fichier

Fichier : obd_full_kegg.py Projet : titus0810/milo-lab

def main():
    estimators = LoadAllEstimators()
    parser = MakeArgParser(estimators)
    args = parser.parse_args()

    thermo = estimators[args.thermodynamics_source]

    kegg_file = ParsedKeggFile.FromKeggFile(args.config_fname)
    entries = kegg_file.entries()
    if len(entries) == 0:
        raise ValueError('No entries in configuration file')
    entry = 'CONFIGURATION'
    if entry not in entries:
        logging.warning(
            'Configuration file does not contain the entry "CONFIGURATION". '
            'Using the first entry by default: %s' % entries[0])
        entry = entries[0]
    p_data = PathwayData.FromFieldMap(kegg_file[entry])
    thermo.SetConditions(pH=p_data.pH, I=p_data.I, T=p_data.T, pMg=p_data.pMg)
    thermo.c_range = p_data.c_range
    bounds = p_data.GetBounds()

    html_writer = HtmlWriter(args.output_prefix + ".html")

    rowdicts = []
    headers = ['Module', 'Name', 'OBD [kJ/mol]', 'Length']
    kegg = Kegg.getInstance()
    for mid in kegg.get_all_mids():
        html_writer.write('<h2 id=M%05d>M%05d: %s</h2>' %
                          (mid, mid, kegg.get_module_name(mid)))
        try:
            d = AnalyzeKeggModule(thermo, mid, bounds, html_writer)
        except KeyError:
            continue
        d['Module'] = '<a href="#M%05d">M%05d</a>' % (mid, mid)
        d['Name'] = kegg.get_module_name(mid)
        rowdicts.append(d)

    rowdicts.sort(key=lambda x: x['OBD [kJ/mol]'])
    html_writer.write_table(rowdicts, headers, decimal=1)
    html_writer.close()

Exemple #4

0

Afficher le fichier

Fichier : obd_full_kegg.py Projet : issfangks/milo-lab

def main():
    estimators = LoadAllEstimators()
    parser = MakeArgParser(estimators)
    args = parser.parse_args()

    thermo = estimators[args.thermodynamics_source]

    kegg_file = ParsedKeggFile.FromKeggFile(args.config_fname)
    entries = kegg_file.entries()
    if len(entries) == 0:
        raise ValueError('No entries in configuration file')
    entry = 'CONFIGURATION'
    if entry not in entries:
        logging.warning('Configuration file does not contain the entry "CONFIGURATION". '
                        'Using the first entry by default: %s' % entries[0])
        entry = entries[0]
    p_data = PathwayData.FromFieldMap(kegg_file[entry])
    thermo.SetConditions(pH=p_data.pH, I=p_data.I, T=p_data.T, pMg=p_data.pMg)
    thermo.c_range = p_data.c_range
    bounds = p_data.GetBounds()
    
    html_writer = HtmlWriter(args.output_prefix + ".html")

    rowdicts = []
    headers = ['Module', 'Name', 'OBD [kJ/mol]', 'Length']
    kegg = Kegg.getInstance()
    for mid in kegg.get_all_mids():
        html_writer.write('<h2 id=M%05d>M%05d: %s</h2>' %
                          (mid, mid, kegg.get_module_name(mid)))
        try:
            d = AnalyzeKeggModule(thermo, mid, bounds, html_writer)
        except KeyError:
            continue
        d['Module'] = '<a href="#M%05d">M%05d</a>' % (mid, mid)
        d['Name'] = kegg.get_module_name(mid)
        rowdicts.append(d)
    
    rowdicts.sort(key=lambda x:x['OBD [kJ/mol]'])
    html_writer.write_table(rowdicts, headers, decimal=1)
    html_writer.close()

Exemple #5

0

Afficher le fichier

Fichier : nist_verify.py Projet : titus0810/milo-lab

def main():
    html_writer = HtmlWriter("../res/nist/report.html")
    estimators = LoadAllEstimators()
    nist = Nist()
    nist.T_range = (273.15 + 24, 273.15 + 40)
    #nist.override_I = 0.25
    #nist.override_pMg = 14.0
    #nist.override_T = 298.15

    html_writer.write('<p>\n')
    html_writer.write("Total number of reaction in NIST: %d</br>\n" %
                      len(nist.data))
    html_writer.write("Total number of reaction in range %.1fK < T < %.1fK: %d</br>\n" % \
                      (nist.T_range[0], nist.T_range[1], len(nist.SelectRowsFromNist())))
    html_writer.write('</p>\n')

    reactions = {}
    reactions['KEGG'] = []
    for reaction in Kegg.getInstance().AllReactions():
        try:
            reaction.Balance(balance_water=True, exception_if_unknown=True)
            reactions['KEGG'].append(reaction)
        except (KeggReactionNotBalancedException, KeggParseException,
                OpenBabelError):
            pass

    reactions['FEIST'] = Feist.FromFiles().reactions
    reactions['NIST'] = nist.GetUniqueReactionSet()

    pairs = []
    #pairs += [('hatzi_gc', 'UGC')], ('PGC', 'PRC'), ('alberty', 'PRC')]
    for t1, t2 in pairs:
        logging.info('Writing the NIST report for %s vs. %s' %
                     (estimators[t1].name, estimators[t2].name))
        html_writer.write('<p><b>%s vs. %s</b> ' %
                          (estimators[t1].name, estimators[t2].name))
        html_writer.insert_toggle(start_here=True)
        two_way_comparison(html_writer=html_writer,
                           thermo1=estimators[t1],
                           thermo2=estimators[t2],
                           reaction_list=reactions['FEIST'],
                           name='%s_vs_%s' % (t1, t2))
        html_writer.div_end()
        html_writer.write('</p>')

    if False:
        estimators['alberty'].CompareOverKegg(
            html_writer,
            other=estimators['PRC'],
            fig_name='kegg_compare_alberty_vs_nist')

    rowdicts = []
    rowdict = {'Method': 'Total'}
    for db_name, reaction_list in reactions.iteritems():
        rowdict[db_name + ' coverage'] = len(reaction_list)
    rowdicts.append(rowdict)

    for name in ['UGC', 'PGC', 'PRC', 'alberty', 'merged', 'hatzi_gc']:
        thermo = estimators[name]
        logging.info('Writing the NIST report for %s' % thermo.name)
        html_writer.write('<p><b>%s</b> ' % thermo.name)
        html_writer.insert_toggle(start_here=True)
        num_estimations, rmse = nist.verify_results(html_writer=html_writer,
                                                    thermodynamics=thermo,
                                                    name=name)
        html_writer.div_end()
        html_writer.write('N = %d, RMSE = %.1f</p>\n' %
                          (num_estimations, rmse))
        logging.info('N = %d, RMSE = %.1f' % (num_estimations, rmse))

        rowdict = {
            'Method': thermo.name,
            'RMSE (kJ/mol)': "%.1f (N=%d)" % (rmse, num_estimations)
        }
        for db_name, reaction_list in reactions.iteritems():
            n_covered = thermo.CalculateCoverage(reaction_list)
            percent = n_covered * 100.0 / len(reaction_list)
            rowdict[db_name +
                    " coverage"] = "%.1f%% (%d)" % (percent, n_covered)
            logging.info(db_name + " coverage = %.1f%%" % percent)
        rowdicts.append(rowdict)

    headers = ['Method', 'RMSE (kJ/mol)'] + \
        [db_name + ' coverage' for db_name in reactions.keys()]
    html_writer.write_table(rowdicts, headers=headers)

Exemple #6

0

Afficher le fichier

Fichier : nist_regression_temp.py Projet : issfangks/milo-lab

def main():
    kegg = Kegg.getInstance()
    prefix = "../res/prc_"

    fixed_cids = {}  # a dictionary from CID to pairs of (nH, dG0)

    # Alberty formation energies directly measured, linearly independent:
    fixed_cids[1] = (2, -237.19)  # H2O
    fixed_cids[9] = (1, -1096.1)  # HPO3(-2)
    fixed_cids[14] = (4, -79.31)  # NH4(+1)
    fixed_cids[59] = (0, -744.53)  # SO4(-2)
    fixed_cids[288] = (1, -586.77)  # HCO3(-1)

    # Alberty zeros:
    fixed_cids[3] = (26, 0.0)  # NAD(ox)
    fixed_cids[10] = (32, 0.0)  # CoA
    fixed_cids[127] = (30, 0.0)  # glutathione(ox)
    fixed_cids[376] = (28, 0.0)  # retinal(ox)

    # Directly measured values
    fixed_cids[4] = (27, 22.65)  # NAD(red) -- relative to NAD(ox)
    fixed_cids[212] = (13, -194.5)  # adenosine
    # fixed_cids[294] = (12, -409.2) # inosine - linearly dependent on other 'anchors'

    # Alberty zeros which are not in NIST:
    # fixed_cids[524] = ( 0, 0.0) # cytochrome c(ox)
    # fixed_cids[16]  = (31, 0.0) # FAD(ox)
    # fixed_cids[139] = ( 0, 0.0) # ferredoxin(ox)
    # fixed_cids[61]  = (19, 0.0) # FMN(ox)
    # fixed_cids[343] = ( 0, 0.0) # thioredoxin(ox)
    # fixed_cids[399] = (90, 0.0) # ubiquinone(ox)

    public_db = SqliteDatabase("../data/public_data.sqlite")
    alberty = PsuedoisomerTableThermodynamics.FromDatabase(
        public_db, "alberty_pseudoisomers", label=None, name="Alberty"
    )
    alberty_cid2dG0 = {}
    alberty_cid2nH = {}
    for cid in alberty.get_all_cids():
        pmap = alberty.cid2PseudoisomerMap(cid)
        dG0, _dG0_tag, nH, _z, _nMg = pmap.GetMostAbundantPseudoisomer(
            pH=default_pH, I=default_I, pMg=default_pMg, T=default_T
        )
        alberty_cid2nH[cid] = nH
        alberty_cid2dG0[cid] = dG0

    if not os.path.exists(prefix + "S.txt"):
        db = SqliteDatabase("../res/gibbs.sqlite")
        nist_regression = NistRegression(db)

        cid2nH = {}
        for cid in nist_regression.nist.GetAllCids():
            if cid in fixed_cids:
                cid2nH[cid] = fixed_cids[cid][0]
            elif cid in alberty_cid2nH:
                cid2nH[cid] = alberty_cid2nH[cid]
            else:
                tmp = nist_regression.dissociation.GetMostAbundantPseudoisomer(
                    cid, pH=default_pH, I=default_I, pMg=default_pMg, T=default_T
                )
                if tmp is not None:
                    cid2nH[cid] = tmp[0]
                else:
                    logging.warning(
                        "The most abundant pseudoisomer of %s (C%05d) "
                        "cannot be resolved. Using nH = 0." % (kegg.cid2name(cid), cid)
                    )
                    cid2nH[cid] = 0

        # nist_regression.std_diff_threshold = 2.0 # the threshold over which to print an analysis of a reaction
        # nist_regression.nist.T_range = None#(273.15 + 24, 273.15 + 40)
        S, dG0, cids = nist_regression.ReverseTransform(cid2nH=cid2nH)

        # export the raw data matrices to text files

        C = np.array([[cid, cid2nH.get(cid, 0)] for cid in cids])
        np.savetxt(prefix + "CID.txt", C, fmt="%d", delimiter=",")
        np.savetxt(prefix + "S.txt", S, fmt="%g", delimiter=",")
        np.savetxt(prefix + "dG0.txt", dG0, fmt="%.2f", delimiter=",")
    else:
        C = np.loadtxt(prefix + "CID.txt", delimiter=",")
        cids = [int(cid) for cid in C[:, 0]]
        cid2nH = {}
        for i, cid in enumerate(cids):
            cid2nH[cid] = int(C[i, 1])
        S = np.loadtxt(prefix + "S.txt", delimiter=",")
        dG0 = np.loadtxt(prefix + "dG0.txt", delimiter=",")
        dG0 = np.reshape(dG0, (dG0.shape[0], 1))

    html_writer = HtmlWriter("../res/regression_fast.html")
    html_writer.write("<h1>Pseudoisomeric Reactant Contributions</h1>\n")
    html_writer.write("<p>The stoichiometric matrix (S):")
    html_writer.insert_toggle(start_here=True)
    stoichiometric_matrix2html(html_writer, S, cids)
    html_writer.div_end()
    html_writer.write("</p>")

    index2value = {}
    S_extended = S  # the stoichiometric matrix, extended with elementary basis vector for the fixed compounds
    for cid in fixed_cids.keys():
        i = cids.index(cid)
        e_i = np.zeros((1, len(cids)))
        e_i[0, i] = 1.0
        S_extended = np.vstack([S_extended, e_i])
        nH, dG0_fixed = fixed_cids[cid]
        index2value[i] = dG0_fixed

    x, _K = LinearRegression.LeastSquaresWithFixedPoints(S, dG0, index2value)
    cid2dG0 = {}
    for i, cid in enumerate(cids):
        cid2dG0[cid] = x[i]

    # Calculate the Kernel of the reduced stoichiometric matrix (after removing
    # the columns of the fixed compounds).
    cids_red = [cid for cid in cids if cid not in fixed_cids]
    index_red = [i for i in xrange(len(cids)) if i not in index2value]
    S_red = S[:, index_red]
    K_red = LinearRegression.Kernel(S_red)

    # print "Reduced Stoichiometric Matrix:"
    # print matrix2string(S_red, cids_red, kegg)
    # print '-'*80

    # Find all CIDs that are completely determined and do not depend on any
    # free variable. In other words, all zeros columns in K2.
    dict_list = []

    determined_indices = np.where(np.sum(abs(K_red), 0) < 1e-10)[0]  # all zero-columns in reducedK
    determined_cids = [cids_red[i] for i in determined_indices]
    plot_data = []
    for i, cid in enumerate(cids):
        d = {
            "CID": "C%05d" % cid,
            "Compound": kegg.cid2name(cid),
            "nH": "%d" % cid2nH[cid],
            "dG0 (PRC)": "%.1f" % cid2dG0[cid],
        }
        if cid in alberty_cid2dG0:
            d["dG0 (Alberty)"] = "%.1f" % alberty_cid2dG0[cid]
            if cid not in fixed_cids:
                plot_data.append((alberty_cid2dG0[cid], cid2dG0[cid], kegg.cid2name(cid)))
        else:
            d["dG0 (Alberty)"] = ""

        if cid in fixed_cids:
            d["Depends on"] = "anchored"
        elif cid in determined_cids:
            d["Depends on"] = "fixed compounds"
        else:
            d["Depends on"] = "kernel dimensions"

        dict_list.append(d)

    dict_list.sort(key=lambda (x): (x["Depends on"], x["CID"]))
    html_writer.write("<p>Formation energies determined by the linear constraints:")
    html_writer.insert_toggle(start_here=True)
    html_writer.write('<font size="1">')
    html_writer.write_table(
        dict_list, headers=["#", "Compound", "CID", "nH", "dG0 (PRC)", "dG0 (Alberty)", "Depends on"]
    )
    html_writer.write("</font>")
    html_writer.div_end()
    html_writer.write("</p>")

    # Plot a comparison between PRC and Alberty formation energies
    fig = plt.figure(figsize=(8, 8), dpi=80)
    plt.plot([x[0] for x in plot_data], [x[1] for x in plot_data], "b.", figure=fig)
    for x, y, name in plot_data:
        plt.text(x, y, name, fontsize=6)
    plt.xlabel("Alberty $\Delta_f G^\circ$")
    plt.ylabel("PRC $\Delta_f G^\circ$")
    html_writer.write("<p>Plot comparing PRC and Alberty results:")
    html_writer.insert_toggle(start_here=True)
    html_writer.embed_matplotlib_figure(fig)
    html_writer.div_end()
    html_writer.write("</p>")

    K_sparse = SparseKernel(S_red).Solve()
    html_writer.write("<p>The sparse null-space of the reduced stoichiometric matrix:")
    html_writer.insert_toggle(start_here=True)
    stoichiometric_matrix2html(html_writer, K_sparse, cids_red)
    html_writer.div_end()
    html_writer.write("</p>")

    dict_list = []
    index2string_html = dict((i, "V<sub>%02d</sub>" % i) for i in xrange(K_sparse.shape[0]))
    index2string = dict((i, "V%d" % i) for i in xrange(K_sparse.shape[0]))
    for i, cid in enumerate(cids_red):
        d = {}
        d["KEGG ID"] = '<a href="%s">C%05d</a>' % (kegg.cid2link(cid), cid)
        d["KEGG ID plain"] = "C%05d" % cid
        d["Compound"] = kegg.cid2name(cid)
        d["nH"] = "%d" % cid2nH[cid]

        if cid in alberty_cid2dG0:
            d["dG0 (Alberty)"] = "%.1f" % alberty_cid2dG0[cid]
        else:
            d["dG0 (Alberty)"] = ""

        d["dG0 (PRC)"] = "%.1f" % cid2dG0[cid]
        d["dG0 (PRC) plain"] = "%.1f" % cid2dG0[cid]

        indic = np.where(abs(K_sparse[:, i]) > 1e-10, 1, 0).tolist()
        indic.reverse()
        d["order_key"] = indic
        if mlab.rms_flat(K_sparse[:, i]) > 1e-10:
            d["dG0 (PRC)"] += " + (" + vector2string(K_sparse[:, i], index2string_html) + ")"
            d["dG0 (PRC) plain"] += " + (" + vector2string(K_sparse[:, i], index2string) + ")"
        dict_list.append(d)

    dict_list.sort(key=lambda (d): (d["order_key"], d["KEGG ID plain"]))

    # Export the results to CSV
    csv_writer = csv.writer(open("../res/prc_results.csv", "w"))
    csv_writer.writerow(["KEGG ID", "Compound", "nH", "dG0 (PRC)", "dG0 (Alberty)"])
    for d in dict_list:
        csv_writer.writerow([d["KEGG ID plain"], d["Compound"], d["nH"], d["dG0 (PRC) plain"], d["dG0 (Alberty)"]])

    html_writer.write("<p>All formation energies as a function of the free variables:")
    html_writer.insert_toggle(start_here=True)
    html_writer.write('<font size="1">')
    html_writer.write_table(dict_list, headers=["#", "KEGG ID", "Compound", "nH", "dG0 (PRC)", "dG0 (Alberty)"])
    html_writer.write("</font>")
    html_writer.div_end()
    html_writer.write("</p>")

    fp = open("../res/prc_latex.txt", "w")
    fp.write(
        latex.table2LaTeX(
            dict_list, headers=["#", "KEGG ID plain", "Compound", "nH", "dG0 (PRC) plain", "dG0 (Alberty)"]
        )
    )
    fp.close()

Exemple #7

0

Afficher le fichier

Fichier : nist_verify.py Projet : issfangks/milo-lab

def main():
    html_writer = HtmlWriter("../res/nist/report.html")
    estimators = LoadAllEstimators()
    nist = Nist()
    nist.T_range = (273.15 + 24, 273.15 + 40)
    #nist.override_I = 0.25
    #nist.override_pMg = 14.0
    #nist.override_T = 298.15
    
    html_writer.write('<p>\n')
    html_writer.write("Total number of reaction in NIST: %d</br>\n" % len(nist.data))
    html_writer.write("Total number of reaction in range %.1fK < T < %.1fK: %d</br>\n" % \
                      (nist.T_range[0], nist.T_range[1], len(nist.SelectRowsFromNist())))
    html_writer.write('</p>\n')

    reactions = {}
    reactions['KEGG'] = []
    for reaction in Kegg.getInstance().AllReactions():
        try:
            reaction.Balance(balance_water=True, exception_if_unknown=True)
            reactions['KEGG'].append(reaction)
        except (KeggReactionNotBalancedException, KeggParseException, OpenBabelError):
            pass
        
    reactions['FEIST'] = Feist.FromFiles().reactions
    reactions['NIST'] = nist.GetUniqueReactionSet()
    
    pairs = []
    #pairs += [('hatzi_gc', 'UGC')], ('PGC', 'PRC'), ('alberty', 'PRC')]
    for t1, t2 in pairs:
        logging.info('Writing the NIST report for %s vs. %s' % 
                     (estimators[t1].name, estimators[t2].name))
        html_writer.write('<p><b>%s vs. %s</b> ' % 
                     (estimators[t1].name, estimators[t2].name))
        html_writer.insert_toggle(start_here=True)
        two_way_comparison(html_writer=html_writer, 
                           thermo1=estimators[t1],
                           thermo2=estimators[t2],
                           reaction_list=reactions['FEIST'],
                           name='%s_vs_%s' % (t1, t2))
        html_writer.div_end()
        html_writer.write('</p>')
    
    if False:
        estimators['alberty'].CompareOverKegg(html_writer, 
                                              other=estimators['PRC'],
                                              fig_name='kegg_compare_alberty_vs_nist')
    
    rowdicts = []
    rowdict = {'Method': 'Total'}
    for db_name, reaction_list in reactions.iteritems():
        rowdict[db_name + ' coverage'] = len(reaction_list)
    rowdicts.append(rowdict)
    
    for name in ['UGC', 'PGC', 'PRC', 'alberty', 'merged', 'hatzi_gc']:
        thermo = estimators[name]
        logging.info('Writing the NIST report for %s' % thermo.name)
        html_writer.write('<p><b>%s</b> ' % thermo.name)
        html_writer.insert_toggle(start_here=True)
        num_estimations, rmse = nist.verify_results(html_writer=html_writer, 
                                                    thermodynamics=thermo,
                                                    name=name)
        html_writer.div_end()
        html_writer.write('N = %d, RMSE = %.1f</p>\n' % (num_estimations, rmse))
        logging.info('N = %d, RMSE = %.1f' % (num_estimations, rmse))
        
        rowdict = {'Method':thermo.name,
            'RMSE (kJ/mol)':"%.1f (N=%d)" % (rmse, num_estimations)}
        for db_name, reaction_list in reactions.iteritems():
            n_covered = thermo.CalculateCoverage(reaction_list)
            percent = n_covered * 100.0 / len(reaction_list)
            rowdict[db_name + " coverage"] = "%.1f%% (%d)" % (percent, n_covered)
            logging.info(db_name + " coverage = %.1f%%" % percent)
        rowdicts.append(rowdict)
    
    headers = ['Method', 'RMSE (kJ/mol)'] + \
        [db_name + ' coverage' for db_name in reactions.keys()]
    html_writer.write_table(rowdicts, headers=headers)

Exemple #8

0

Afficher le fichier

def main():
    kegg = Kegg.getInstance()
    prefix = '../res/prc_'

    fixed_cids = {}  # a dictionary from CID to pairs of (nH, dG0)

    # Alberty formation energies directly measured, linearly independent:
    fixed_cids[1] = (2, -237.19)  # H2O
    fixed_cids[9] = (1, -1096.1)  # HPO3(-2)
    fixed_cids[14] = (4, -79.31)  # NH4(+1)
    fixed_cids[59] = (0, -744.53)  # SO4(-2)
    fixed_cids[288] = (1, -586.77)  # HCO3(-1)

    # Alberty zeros:
    fixed_cids[3] = (26, 0.0)  # NAD(ox)
    fixed_cids[10] = (32, 0.0)  # CoA
    fixed_cids[127] = (30, 0.0)  # glutathione(ox)
    fixed_cids[376] = (28, 0.0)  # retinal(ox)

    # Directly measured values
    fixed_cids[4] = (27, 22.65)  # NAD(red) -- relative to NAD(ox)
    fixed_cids[212] = (13, -194.5)  # adenosine
    #fixed_cids[294] = (12, -409.2) # inosine - linearly dependent on other 'anchors'

    # Alberty zeros which are not in NIST:
    #fixed_cids[524] = ( 0, 0.0) # cytochrome c(ox)
    #fixed_cids[16]  = (31, 0.0) # FAD(ox)
    #fixed_cids[139] = ( 0, 0.0) # ferredoxin(ox)
    #fixed_cids[61]  = (19, 0.0) # FMN(ox)
    #fixed_cids[343] = ( 0, 0.0) # thioredoxin(ox)
    #fixed_cids[399] = (90, 0.0) # ubiquinone(ox)

    public_db = SqliteDatabase("../data/public_data.sqlite")
    alberty = PsuedoisomerTableThermodynamics.FromDatabase(
        public_db, 'alberty_pseudoisomers', label=None, name='Alberty')
    alberty_cid2dG0 = {}
    alberty_cid2nH = {}
    for cid in alberty.get_all_cids():
        pmap = alberty.cid2PseudoisomerMap(cid)
        dG0, _dG0_tag, nH, _z, _nMg = pmap.GetMostAbundantPseudoisomer(
            pH=default_pH, I=default_I, pMg=default_pMg, T=default_T)
        alberty_cid2nH[cid] = nH
        alberty_cid2dG0[cid] = dG0

    if not os.path.exists(prefix + 'S.txt'):
        db = SqliteDatabase("../res/gibbs.sqlite")
        nist_regression = NistRegression(db)

        cid2nH = {}
        for cid in nist_regression.nist.GetAllCids():
            if cid in fixed_cids:
                cid2nH[cid] = fixed_cids[cid][0]
            elif cid in alberty_cid2nH:
                cid2nH[cid] = alberty_cid2nH[cid]
            else:
                tmp = nist_regression.dissociation.GetMostAbundantPseudoisomer(
                    cid,
                    pH=default_pH,
                    I=default_I,
                    pMg=default_pMg,
                    T=default_T)
                if tmp is not None:
                    cid2nH[cid] = tmp[0]
                else:
                    logging.warning(
                        'The most abundant pseudoisomer of %s (C%05d) '
                        'cannot be resolved. Using nH = 0.' %
                        (kegg.cid2name(cid), cid))
                    cid2nH[cid] = 0

        #nist_regression.std_diff_threshold = 2.0 # the threshold over which to print an analysis of a reaction
        #nist_regression.nist.T_range = None#(273.15 + 24, 273.15 + 40)
        S, dG0, cids = nist_regression.ReverseTransform(cid2nH=cid2nH)

        # export the raw data matrices to text files

        C = np.array([[cid, cid2nH.get(cid, 0)] for cid in cids])
        np.savetxt(prefix + 'CID.txt', C, fmt='%d', delimiter=',')
        np.savetxt(prefix + 'S.txt', S, fmt='%g', delimiter=',')
        np.savetxt(prefix + 'dG0.txt', dG0, fmt='%.2f', delimiter=',')
    else:
        C = np.loadtxt(prefix + 'CID.txt', delimiter=',')
        cids = [int(cid) for cid in C[:, 0]]
        cid2nH = {}
        for i, cid in enumerate(cids):
            cid2nH[cid] = int(C[i, 1])
        S = np.loadtxt(prefix + 'S.txt', delimiter=',')
        dG0 = np.loadtxt(prefix + 'dG0.txt', delimiter=',')
        dG0 = np.reshape(dG0, (dG0.shape[0], 1))

    html_writer = HtmlWriter('../res/regression_fast.html')
    html_writer.write("<h1>Pseudoisomeric Reactant Contributions</h1>\n")
    html_writer.write("<p>The stoichiometric matrix (S):")
    html_writer.insert_toggle(start_here=True)
    stoichiometric_matrix2html(html_writer, S, cids)
    html_writer.div_end()
    html_writer.write('</p>')

    index2value = {}
    S_extended = S  # the stoichiometric matrix, extended with elementary basis vector for the fixed compounds
    for cid in fixed_cids.keys():
        i = cids.index(cid)
        e_i = np.zeros((1, len(cids)))
        e_i[0, i] = 1.0
        S_extended = np.vstack([S_extended, e_i])
        nH, dG0_fixed = fixed_cids[cid]
        index2value[i] = dG0_fixed

    x, _K = LinearRegression.LeastSquaresWithFixedPoints(S, dG0, index2value)
    cid2dG0 = {}
    for i, cid in enumerate(cids):
        cid2dG0[cid] = x[i]

    # Calculate the Kernel of the reduced stoichiometric matrix (after removing
    # the columns of the fixed compounds).
    cids_red = [cid for cid in cids if cid not in fixed_cids]
    index_red = [i for i in xrange(len(cids)) if i not in index2value]
    S_red = S[:, index_red]
    K_red = LinearRegression.Kernel(S_red)

    #print "Reduced Stoichiometric Matrix:"
    #print matrix2string(S_red, cids_red, kegg)
    #print '-'*80

    # Find all CIDs that are completely determined and do not depend on any
    # free variable. In other words, all zeros columns in K2.
    dict_list = []

    determined_indices = np.where(
        np.sum(abs(K_red), 0) < 1e-10)[0]  # all zero-columns in reducedK
    determined_cids = [cids_red[i] for i in determined_indices]
    plot_data = []
    for i, cid in enumerate(cids):
        d = {
            'CID': 'C%05d' % cid,
            'Compound': kegg.cid2name(cid),
            'nH': '%d' % cid2nH[cid],
            'dG0 (PRC)': '%.1f' % cid2dG0[cid]
        }
        if cid in alberty_cid2dG0:
            d['dG0 (Alberty)'] = '%.1f' % alberty_cid2dG0[cid]
            if cid not in fixed_cids:
                plot_data.append(
                    (alberty_cid2dG0[cid], cid2dG0[cid], kegg.cid2name(cid)))
        else:
            d['dG0 (Alberty)'] = ''

        if cid in fixed_cids:
            d['Depends on'] = 'anchored'
        elif cid in determined_cids:
            d['Depends on'] = 'fixed compounds'
        else:
            d['Depends on'] = 'kernel dimensions'

        dict_list.append(d)

    dict_list.sort(key=lambda (x): (x['Depends on'], x['CID']))
    html_writer.write(
        "<p>Formation energies determined by the linear constraints:")
    html_writer.insert_toggle(start_here=True)
    html_writer.write('<font size="1">')
    html_writer.write_table(dict_list,
                            headers=[
                                '#', 'Compound', 'CID', 'nH', 'dG0 (PRC)',
                                'dG0 (Alberty)', 'Depends on'
                            ])
    html_writer.write('</font>')
    html_writer.div_end()
    html_writer.write('</p>')

    # Plot a comparison between PRC and Alberty formation energies
    fig = plt.figure(figsize=(8, 8), dpi=80)
    plt.plot([x[0] for x in plot_data], [x[1] for x in plot_data],
             'b.',
             figure=fig)
    for x, y, name in plot_data:
        plt.text(x, y, name, fontsize=6)
    plt.xlabel('Alberty $\Delta_f G^\circ$')
    plt.ylabel('PRC $\Delta_f G^\circ$')
    html_writer.write("<p>Plot comparing PRC and Alberty results:")
    html_writer.insert_toggle(start_here=True)
    html_writer.embed_matplotlib_figure(fig)
    html_writer.div_end()
    html_writer.write("</p>")

    K_sparse = SparseKernel(S_red).Solve()
    html_writer.write(
        "<p>The sparse null-space of the reduced stoichiometric matrix:")
    html_writer.insert_toggle(start_here=True)
    stoichiometric_matrix2html(html_writer, K_sparse, cids_red)
    html_writer.div_end()
    html_writer.write("</p>")

    dict_list = []
    index2string_html = dict(
        (i, "V<sub>%02d</sub>" % i) for i in xrange(K_sparse.shape[0]))
    index2string = dict((i, "V%d" % i) for i in xrange(K_sparse.shape[0]))
    for i, cid in enumerate(cids_red):
        d = {}
        d['KEGG ID'] = '<a href="%s">C%05d</a>' % (kegg.cid2link(cid), cid)
        d['KEGG ID plain'] = 'C%05d' % cid
        d['Compound'] = kegg.cid2name(cid)
        d['nH'] = '%d' % cid2nH[cid]

        if cid in alberty_cid2dG0:
            d['dG0 (Alberty)'] = '%.1f' % alberty_cid2dG0[cid]
        else:
            d['dG0 (Alberty)'] = ''

        d['dG0 (PRC)'] = '%.1f' % cid2dG0[cid]
        d['dG0 (PRC) plain'] = '%.1f' % cid2dG0[cid]

        indic = np.where(abs(K_sparse[:, i]) > 1e-10, 1, 0).tolist()
        indic.reverse()
        d['order_key'] = indic
        if mlab.rms_flat(K_sparse[:, i]) > 1e-10:
            d['dG0 (PRC)'] += " + (" + vector2string(K_sparse[:, i],
                                                     index2string_html) + ")"
            d['dG0 (PRC) plain'] += " + (" + vector2string(
                K_sparse[:, i], index2string) + ")"
        dict_list.append(d)

    dict_list.sort(key=lambda (d): (d['order_key'], d['KEGG ID plain']))

    # Export the results to CSV
    csv_writer = csv.writer(open('../res/prc_results.csv', 'w'))
    csv_writer.writerow(
        ['KEGG ID', 'Compound', 'nH', 'dG0 (PRC)', 'dG0 (Alberty)'])
    for d in dict_list:
        csv_writer.writerow([
            d['KEGG ID plain'], d['Compound'], d['nH'], d['dG0 (PRC) plain'],
            d['dG0 (Alberty)']
        ])

    html_writer.write(
        "<p>All formation energies as a function of the free variables:")
    html_writer.insert_toggle(start_here=True)
    html_writer.write('<font size="1">')
    html_writer.write_table(dict_list,
                            headers=[
                                '#', 'KEGG ID', 'Compound', 'nH', 'dG0 (PRC)',
                                'dG0 (Alberty)'
                            ])
    html_writer.write('</font>')
    html_writer.div_end()
    html_writer.write('</p>')

    fp = open('../res/prc_latex.txt', 'w')
    fp.write(
        latex.table2LaTeX(dict_list,
                          headers=[
                              '#', 'KEGG ID plain', 'Compound', 'nH',
                              'dG0 (PRC) plain', 'dG0 (Alberty)'
                          ]))
    fp.close()