Esempio n. 1
0
def ExportJSONFiles():
    options, _ = MakeOpts().parse_args(sys.argv)
    print "Using the database file: " + options.public_db
    print "Using the NIST table: " + options.nist_table
    print "Saving the data to the CSV file: " + options.output_csv

    db = SqliteDatabase(options.public_db)
    csv_writer = csv.writer(open(options.output_csv, 'w'))
    csv_writer.writerow([
        'url', 'reference_id', 'method', 'evaluation', 'ec', 'enzyme',
        'kegg_reaction', 'reaction', 'K', 'K_tag', 'T (K)', 'I (M)', 'pH',
        'pMg'
    ])
    for row in db.DictReader(options.nist_table):
        csvrow = [
            row[t] for t in [
                'url', 'reference_id', 'method', 'evaluation', 'ec', 'enzyme',
                'kegg_reaction', 'reaction'
            ]
        ]
        csvrow += [
            reformat_number_string(row['K'], '%.3e'),
            reformat_number_string(row['K_tag'], '%.3e'),
            reformat_number_string(row['T'], '%.2f'),
            reformat_number_string(row['I'], '%.2f'),
            reformat_number_string(row['pH'], '%.2f'),
            reformat_number_string(row['pMg'], '%.2f')
        ]
        csv_writer.writerow(csvrow)
    def setUp(self):
        fake_csv_file = StringIO(CSV_DATA)
        csv_reader = csv.DictReader(fake_csv_file)
        self.fake_thermo_csv = PsuedoisomerTableThermodynamics()
        self.fake_thermo_csv = PsuedoisomerTableThermodynamics._FromDictReader(
            csv_reader, self.fake_thermo_csv, warn_for_conflicting_refs=False)

        db = SqliteDatabase(PUBLIC_DB_FNAME)
        db_reader = db.DictReader('fake_pseudoisomers')
        self.fake_thermo_db = PsuedoisomerTableThermodynamics()
        self.fake_thermo_db = PsuedoisomerTableThermodynamics._FromDictReader(
            db_reader, self.fake_thermo_db, warn_for_conflicting_refs=False)
Esempio n. 3
0
def compare_charges():
    #db_public = SqliteDatabase('../data/public_data.sqlite')
    db_gibbs = SqliteDatabase('../res/gibbs.sqlite')
    print "Writing Compare Charges report to ../res/groups_report.html"
    html_writer = HtmlWriter("../res/groups_report.html")
    kegg = Kegg.getInstance()

    #pH, I, pMg, T = default_pH, default_I, default_pMg, default_T
    pH, I, pMg, T = default_pH, 0, 14, default_T

    cid2error = {}
    for row_dict in db_gibbs.DictReader("gc_errors"):
        cid = int(row_dict['cid'])
        cid2error[cid] = row_dict['error']

    estimators = {}
    estimators['hatzi'] = Hatzi(use_pKa=False)
    estimators['milo'] = PsuedoisomerTableThermodynamics.FromDatabase(
        db_gibbs, 'gc_pseudoisomers', name='Milo Group Contribution')

    all_cids = set(lsum([e.get_all_cids() for e in estimators.values()]))
    dict_list = []
    for cid in all_cids:
        try:
            name = kegg.cid2name(cid)
            link = kegg.cid2compound(cid).get_link()
        except KeyError:
            name = "unknown"
            link = ""
        row_dict = {
            'cid': '<a href="%s">C%05d</a>' % (link, cid),
            'name': name,
            'error': cid2error.get(cid, None)
        }
        for key, est in estimators.iteritems():
            try:
                pmap = est.cid2PseudoisomerMap(cid)
                dG0, dG0_tag, nH, z, nMg = pmap.GetMostAbundantPseudoisomer(
                    pH, I, pMg, T)
            except MissingCompoundFormationEnergy:
                dG0, dG0_tag, nH, z, nMg = "", "", "", "", ""
            row_dict['nH_' + key] = nH
            row_dict['charge_' + key] = z
            row_dict['nMg_' + key] = nMg
            row_dict['dG0_' + key] = dG0
            row_dict['dG0_tag_' + key] = dG0_tag
        dict_list.append(row_dict)

    html_writer.write_table(
        dict_list,
        headers=['cid', 'name', 'charge_hatzi', 'charge_milo', 'error'])
    html_writer.close()
Esempio n. 4
0
def ExportJSONFiles():
    estimators = LoadAllEstimators()
    options, _ = MakeOpts(estimators).parse_args(sys.argv)

    thermo_list = []
    thermo_list.append(estimators[options.thermodynamics_source])
    thermo_list.append(
        PsuedoisomerTableThermodynamics.FromCsvFile(
            options.thermodynamics_csv))

    # Make sure we have all the data.
    kegg = Kegg.getInstance()
    for i, thermo in enumerate(thermo_list):
        print "Priority %d - formation energies of: %s" % (i + 1, thermo.name)
        kegg.AddThermodynamicData(thermo, priority=(i + 1))

    db = SqliteDatabase('../res/gibbs.sqlite')

    print 'Exporting Group Contribution Nullspace matrix as JSON.'
    nullspace_vectors = []
    for row in db.DictReader('ugc_conservations'):
        d = {'msg': row['msg']}
        sparse = json.loads(row['json'])
        d['reaction'] = []
        for cid, coeff in sparse.iteritems():
            d['reaction'].append([coeff, "C%05d" % int(cid)])
        nullspace_vectors.append(d)
    WriteJSONFile(nullspace_vectors, options.nullspace_out_filename)

    print 'Exporting KEGG compounds as JSON.'
    WriteJSONFile(kegg.AllCompounds(), options.compounds_out_filename)

    print 'Exporting KEGG reactions as JSON.'
    WriteJSONFile(kegg.AllReactions(), options.reactions_out_filename)

    print 'Exporting KEGG enzymes as JSON.'
    WriteJSONFile(kegg.AllEnzymes(), options.enzymes_out_filename)
Esempio n. 5
0
class Nist(object):
    def __init__(self, T_range=(298, 314)):
        self.db = SqliteDatabase('../data/public_data.sqlite')
        self.kegg = Kegg.getInstance()
        self.T_range = T_range
        self.pH_range = None
        self.override_I = None
        self.override_pMg = None
        self.override_T = None
        self.FromDatabase()
        self.BalanceReactions()

    def FromDatabase(self):
        self.data = []
        self.cid2count = {}
        logging.info('Reading NIST reaction data from database ...')
        for i, row_dict in enumerate(self.db.DictReader('nist_equilibrium')):
            nist_row_data = NistRowData()
            try:
                nist_row_data.ReadFromDatabase('nist%05d' % i, row_dict)
                self.data.append(nist_row_data)
                for cid in nist_row_data.GetAllCids():
                    self.cid2count[cid] = self.cid2count.setdefault(cid, 0) + 1
            except NistMissingCrucialDataException as e:
                logging.debug(str(e))
        logging.info('Total of %d rows read from the NIST database' %
                     len(self.data))

    def BalanceReactions(self, balance_water=True):
        for row in self.data:
            try:
                row.reaction.Balance(balance_water)
            except KeggReactionNotBalancedException as e:
                raise Exception(
                    str(e) + '\n' + str(row.reaction) + '\n' + row.url)

    def GetAllCids(self):
        return sorted(self.cid2count.keys())

    def AnalyzeStats(self, html_writer):
        """
            Produces a set of plots that show some statistics about the NIST database
        """
        logging.info('Calculating statistics for NIST database (%d rows)' %
                     len(self.data))

        if not self.data:
            raise Exception("The database has no rows in it")

        T_list = []
        I_list = []
        pH_list = []
        pMg_list = []
        year_list = []
        for nist_row_data in self.data:
            pH_list.append(nist_row_data.pH)
            T_list.append(nist_row_data.T - 273.15)

            if nist_row_data.I:
                I_list.append(nist_row_data.I)
            if nist_row_data.pMg:
                pMg_list.append(nist_row_data.pMg)

            year = nist_row_data.GetYear()
            if year:
                year_list.append(year)

        html_writer.write("<p><h2>NIST database statistics</h2>\n")
        fig = plt.figure()
        plt.title("Temperature histogram")
        plt.hist(T_list, np.arange(int(min(T_list)), int(max(T_list) + 1),
                                   2.5))
        plt.xlabel("Temperature (C)")
        plt.ylabel("No. of measurements")
        html_writer.embed_matplotlib_figure(fig,
                                            width=320,
                                            height=240,
                                            name='hist_T')

        fig = plt.figure()
        plt.hist(pMg_list, np.arange(0, 10.1, 0.1))
        plt.title("pMg histogram")
        plt.xlabel("pMg")
        plt.ylabel("No. of measurements")
        html_writer.embed_matplotlib_figure(fig,
                                            width=320,
                                            height=240,
                                            name='hist_pMg')

        fig = plt.figure()
        plt.hist(pH_list, np.arange(4, 11, 0.1))
        plt.title("pH histogram")
        plt.xlabel("pH")
        plt.ylabel("No. of measurements")
        html_writer.embed_matplotlib_figure(fig,
                                            width=320,
                                            height=240,
                                            name='hist_pH')

        fig = plt.figure()
        plt.hist(I_list, np.arange(0, 1, 0.025))
        plt.title("Ionic Strength histogram")
        plt.xlabel("Ionic Strength [M]")
        plt.ylabel("No. of measurements")
        html_writer.embed_matplotlib_figure(fig,
                                            width=320,
                                            height=240,
                                            name='hist_I')

        # histogram of publication years
        fig = plt.figure()
        plt.hist(year_list, np.arange(1930, 2010, 5))
        plt.title("Year of publication histogram")
        plt.xlabel("Year of publication")
        plt.ylabel("No. of measurements")
        html_writer.embed_matplotlib_figure(fig,
                                            width=320,
                                            height=240,
                                            name='hist_year')

        db_public = SqliteDatabase('../data/public_data.sqlite')
        alberty = PsuedoisomerTableThermodynamics.FromDatabase(
            db_public, 'alberty_pseudoisomers')
        alberty_cids = set(alberty.get_all_cids())
        nist_cids = set(self.GetAllCids())

        count_list = [
            "Alberty #compounds = %d" % len(alberty_cids),
            "NIST #compounds = %d" % len(nist_cids),
            "intersection #compounds = %d" %
            len(alberty_cids.intersection(nist_cids))
        ]
        html_writer.write_ul(count_list)

        N = 60  # cutoff for the number of counts in the histogram
        hist_a = np.zeros(N)
        hist_b = np.zeros(N)
        for cid, cnt in self.cid2count.iteritems():
            if cnt >= N:
                cnt = N - 1
            if cid in alberty_cids:
                hist_a[cnt] += 1
            else:
                hist_b[cnt] += 1
        hist_a[0] = len(alberty_cids.difference(self.cid2count.keys()))

        fig = plt.figure()
        plt.rc('font', size=10)
        plt.hold(True)
        p1 = plt.bar(range(N), hist_a, color='b')
        p2 = plt.bar(range(N), hist_b, color='r', bottom=hist_a[0:N])
        plt.text(N - 1,
                 hist_a[N - 1] + hist_b[N - 1],
                 '> %d' % (N - 1),
                 fontsize=10,
                 horizontalalignment='right',
                 verticalalignment='baseline')
        plt.title("Overlap with Alberty's database")
        plt.xlabel("N reactions")
        plt.ylabel("no. of compounds measured in N reactions")
        plt.legend((p1[0], p2[0]),
                   ("Exist in Alberty's database", "New compounds"))

        html_writer.embed_matplotlib_figure(fig,
                                            width=320,
                                            height=240,
                                            name='connectivity')

    def AnalyzeConnectivity(self, html_writer):
        def cid2name(cid, KEGG):
            return "\"" + KEGG.cid2name(cid) + "\""

        def load_cid_set(train_csv_fname):
            """
                Read the training data from a CSV file
            """
            cid_set = set()
            for row in csv.DictReader(open(train_csv_fname)):
                #(smiles, cid, compoud_name, dG0, dH0, z, nH, Mg, use_for, ref, remark) = row
                if (row['use for'] in ['skip']):
                    continue
                cid = int(row['cid'])
                if cid > 0:
                    cid_set.add(cid)
            return cid_set

        known_cids = load_cid_set('../data/thermodynamics/dG0_seed.csv')
        one_step_cids = set()
        coupled_cids = set()
        Gdot = pydot.Dot()

        for nist_row_data in nist.data:
            unknown_cids = list(
                nist_row_data.GetAllCids().difference(known_cids))
            if len(unknown_cids) == 1:
                one_step_cids.add(unknown_cids[0])
            elif len(unknown_cids) == 2:
                coupled_cids.add((min(unknown_cids), max(unknown_cids)))

        for cid in one_step_cids:
            #Gdot.add_node(pydot.Node(cid2name(cid, KEGG), None))
            Gdot.add_node(pydot.Node("C%05d" % cid, None))

        for (cid1, cid2) in coupled_cids:
            Gdot.add_edge(pydot.Edge("C%05d" % cid1, "C%05d" % cid2, None))

        html_writer.write("<p><h2>Connectivity</h2>\n")
        html_writer.embed_dot_inline(Gdot, width=640, height=480)
        html_writer.write("</p>\n")
        #win = xdot.DotWindow()
        #win.connect('destroy', gtk.main_quit)
        #win.set_filter('dot')
        #util._mkdir('../res/nist')
        #dot_fname = '../res/nist/connectivity.dot'
        #Gdot.write(dot_fname, format='dot')
        #win.open_file(dot_fname)
        #gtk.main()

    def verify_formation(self, html_writer, thermodynamics, name=None):
        cid2errors = defaultdict(list)
        cid2refs = defaultdict(set)
        reaction2errors = defaultdict(list)
        reaction2refs = defaultdict(set)
        for row_data in self.SelectRowsFromNist():
            dG0_est = row_data.PredictReactionEnergy(thermodynamics)
            if np.isnan(dG0_est):
                continue
            err = row_data.dG0_r - dG0_est
            for cid in row_data.GetAllCids():
                cid2errors[cid].append(err)
                cid2refs[cid].add((row_data.ref_id, row_data.url))
            reaction2errors[row_data.reaction].append(err)
            reaction2refs[row_data.reaction].add(
                (row_data.ref_id, row_data.url))

        rowdicts = []
        for cid, err_list in cid2errors.iteritems():
            refs = cid2refs[cid]
            urls = ', '.join([
                '<a href="%s">%s</a>' % (url, ref_id) for ref_id, url in refs
            ])
            rowdict = {
                'cid': 'C%05d' % cid,
                'name': self.kegg.cid2name(cid),
                'RMSE': rms_flat(err_list),
                'E[err]': np.mean(err_list),
                '#err': len(err_list),
                'std[err]': np.std(err_list),
                'URLs': urls
            }
            rowdicts.append(rowdict)

        rowdicts.sort(key=lambda x: x['RMSE'], reverse=True)
        html_writer.write_table(
            rowdicts,
            ['#', 'cid', 'name', 'RMSE', '#err', 'E[err]', 'std[err]', 'URLs'],
            decimal=1)

        rowdicts = []
        for reaction, err_list in reaction2errors.iteritems():
            refs = reaction2refs[reaction]
            urls = ', '.join([
                '<a href="%s">%s</a>' % (url, ref_id) for ref_id, url in refs
            ])
            rowdict = {
                'reaction': reaction.to_hypertext(show_cids=False),
                'RMSE': rms_flat(err_list),
                'E[err]': np.mean(err_list),
                '#err': len(err_list),
                'std[err]': np.std(err_list),
                'URLs': urls
            }
            rowdicts.append(rowdict)

        rowdicts.sort(key=lambda x: x['RMSE'], reverse=True)
        html_writer.write_table(
            rowdicts,
            ['#', 'reaction', 'RMSE', '#err', 'E[err]', 'std[err]', 'URLs'],
            decimal=1)

    def verify_results(self, html_writer, thermodynamics, name=None):
        """Calculate all the dG0_r for the reaction from NIST and compare to
           the measured data.
        
        Write results to HTML.
        
        Args:
            thermodynamics: a Thermodynamics object that provides dG estimates.
            ignore_I: whether or not to ignore the ionic strength in NIST.
        """

        dG0_obs_vec = []
        dG0_est_vec = []

        # A mapping from each evaluation method (NIST calls separates them to
        # A, B, C and D) to the results of the relevant measurements
        evaluation_map = {}
        rowdicts = []
        finite_rowdicts = []

        eval_to_label = {
            'A': 'high quality',
            'B': 'low quality',
            'C': 'low quality',
            'D': 'low quality',
            'E': 'low quality'
        }

        for row_data in self.SelectRowsFromNist():
            rowdict = {}
            label = eval_to_label[row_data.evaluation]
            if label not in evaluation_map:
                evaluation_map[label] = ([], [])
            rowdict[symbol_dr_G0_prime + ' (obs)'] = np.round(
                row_data.dG0_r, 1)
            rowdict['_reaction'] = row_data.reaction
            rowdict['reaction'] = row_data.reaction.to_hypertext(
                show_cids=False)
            if row_data.reaction.rid is not None:
                rowdict['rid'] = '<a href="%s">R%05d</a>' % (
                    row_data.reaction.get_link(), row_data.reaction.rid)
            else:
                rowdict['rid'] = ''
            rowdict['pH'] = row_data.pH
            rowdict['pMg'] = row_data.pMg
            rowdict['I'] = row_data.I
            rowdict['T'] = row_data.T
            rowdict['eval.'] = row_data.evaluation
            rowdict['url'] = '<a href="%s">%s</a>' % (row_data.url,
                                                      row_data.ref_id)

            dG0_est = row_data.PredictReactionEnergy(thermodynamics)
            if np.isfinite(dG0_est):
                dG0_obs_vec.append(row_data.dG0_r)
                dG0_est_vec.append(dG0_est)
                evaluation_map[label][0].append(row_data.dG0_r)
                evaluation_map[label][1].append(dG0_est)
                rowdict[symbol_dr_G0_prime + ' (est)'] = np.round(dG0_est, 1)
                rowdict['residual'] = np.round(row_data.dG0_r - dG0_est, 3)
                rowdict['|error|'] = abs(rowdict['residual'])
                rowdict['sort_key'] = -rowdict['|error|']
                finite_rowdicts.append(rowdict)
            else:
                rowdict['sort_key'] = 1

            rowdicts.append(rowdict)

        rowdicts.sort(key=lambda x: x['sort_key'])

        if not dG0_obs_vec:
            return 0, 0

        unique_reaction_dict = defaultdict(list)
        for rowdict in finite_rowdicts:
            unique_reaction_dict[rowdict['_reaction']].append(
                rowdict['|error|'])
        unique_rmse_list = [
            rms_flat(error_list)
            for error_list in unique_reaction_dict.values()
        ]
        unique_rmse = rms_flat(unique_rmse_list)

        resid = np.array(dG0_obs_vec) - np.array(dG0_est_vec)
        rmse = rms_flat(resid.flat)

        # plot the profile graph
        plt.rcParams['text.usetex'] = False
        plt.rcParams['legend.fontsize'] = 10
        plt.rcParams['font.family'] = 'sans-serif'
        plt.rcParams['font.size'] = 12
        plt.rcParams['lines.linewidth'] = 1
        plt.rcParams['lines.markersize'] = 3

        fig1 = plt.figure(figsize=(6, 6), dpi=90)
        plt.hold(True)

        colors = ['purple', 'orange']
        for i, label in enumerate(sorted(evaluation_map.keys())):
            measured, predicted = evaluation_map[label]
            plt.plot(measured,
                     predicted,
                     marker='.',
                     linestyle='None',
                     markerfacecolor=colors[i],
                     markeredgecolor=colors[i],
                     markersize=5,
                     label=label,
                     figure=fig1)

        plt.legend(loc='lower right')

        plt.text(-50,
                 40,
                 r'RMSE = %.1f [kJ/mol]' % (unique_rmse),
                 fontsize=14,
                 figure=fig1)
        plt.xlabel(r'observed $\Delta_r G^{\'\circ}$ [kJ/mol]',
                   fontsize=14,
                   figure=fig1)
        plt.ylabel(r'estimated $\Delta_r G^{\'\circ}$ [kJ/mol]',
                   fontsize=14,
                   figure=fig1)
        #min_x = min(dG0_obs_vec)
        #max_x = max(dG0_obs_vec)
        plt.plot([-60, 60], [-60, 60], 'k--', figure=fig1)
        plt.axis([-60, 60, -60, 60])
        if name:
            html_writer.embed_matplotlib_figure(fig1, name=name + "_eval")
        else:
            html_writer.embed_matplotlib_figure(fig1)

        fig2 = plt.figure(figsize=(6, 6), dpi=90)
        binned_plot(x=[rowdict['pH'] for rowdict in finite_rowdicts],
                    y=[rowdict['|error|'] for rowdict in finite_rowdicts],
                    bins=[5, 6, 7, 8, 9],
                    y_type='rmse',
                    figure=fig2)
        plt.xlim((4, 11))
        plt.ylim((0, 12))
        plt.title(r'effect of pH', fontsize=14, figure=fig2)
        plt.xlabel('pH', fontsize=14, figure=fig2)
        plt.ylabel(r'RMSE ($\Delta_r G^{\'\circ}$) [kJ/mol]',
                   fontsize=14,
                   figure=fig2)
        if name:
            html_writer.embed_matplotlib_figure(fig2, name=name + "_pH")
        else:
            html_writer.embed_matplotlib_figure(fig2)

        fig3 = plt.figure(figsize=(6, 6), dpi=90)
        plt.hist([rowdict['residual'] for rowdict in finite_rowdicts],
                 bins=np.arange(-50, 50, 0.5))
        plt.title(r'RMSE = %.1f [kJ/mol]' % rmse, fontsize=14, figure=fig3)
        plt.xlabel(r'residual $\Delta_r G^{\'\circ}$ [kJ/mol]',
                   fontsize=14,
                   figure=fig3)
        plt.ylabel(r'no. of measurements', fontsize=14, figure=fig3)
        if name:
            html_writer.embed_matplotlib_figure(fig3, name=name + "_hist")
        else:
            html_writer.embed_matplotlib_figure(fig3)

        table_headers = [
            "#", "|error|", symbol_dr_G0_prime + " (obs)",
            symbol_dr_G0_prime + " (est)", "reaction", "rid", "pH", "pMg", "I",
            "T", "eval.", "url"
        ]
        html_writer.write_table(rowdicts, table_headers, decimal=1)

        return len(dG0_obs_vec), unique_rmse

    def two_way_comparison(self, html_writer, thermo1, thermo2, name=None):
        """
            Compare the estimation errors of two different evaluation methods.
        
        Write results to HTML.
        
        Args:
            thermo1: a Thermodynamics object that provides dG estimates.
            thermo2: a Thermodynamics object that provides dG estimates.
        """

        total_list = []

        for row_data in self.SelectRowsFromNist():
            try:
                dG0_pred1 = row_data.PredictReactionEnergy(thermo1)
                dG0_pred2 = row_data.PredictReactionEnergy(thermo2)
            except MissingReactionEnergy as e:
                logging.debug("the reaction in (%s) cannot be estimated: %s" %
                              (row_data.ref_id, str(e)))
                continue

            total_list.append([
                row_data.dG0_r, dG0_pred1, dG0_pred2, row_data.reaction,
                row_data.pH, row_data.pMg, row_data.I, row_data.T,
                row_data.evaluation, row_data.url
            ])

        if not total_list:
            return 0, 0

        # plot the profile graph
        plt.rcParams['text.usetex'] = False
        plt.rcParams['font.family'] = 'sans-serif'
        plt.rcParams['font.size'] = 8
        plt.rcParams['lines.linewidth'] = 2
        plt.rcParams['lines.markersize'] = 2
        plt.rcParams['figure.dpi'] = 100

        data_mat = np.array(total_list)
        fig1 = plt.figure(figsize=(4, 4))
        plt.hold(True)
        error1 = data_mat[:, 0] - data_mat[:, 1]
        error2 = data_mat[:, 0] - data_mat[:, 2]

        max_err = max(error1.max(), error2.max())
        min_err = min(error1.min(), error2.min())
        plt.plot([min_err, max_err], [min_err, max_err], 'k--', figure=fig1)
        plt.plot(error1, error2, '.', figure=fig1)
        plt.title("Error Comparison per Reaction (in kJ/mol)")
        plt.xlabel(thermo1.name, figure=fig1)
        plt.ylabel(thermo2.name, figure=fig1)
        html_writer.embed_matplotlib_figure(fig1, name=name + "_corr")

        fig2 = plt.figure(figsize=(7, 3))
        for i, thermo in enumerate([thermo1, thermo2]):
            fig2.add_subplot(1, 2, i + 1)
            plt.plot(data_mat[:, 0], data_mat[:, i + 1], 'b.')
            rmse = rms_flat((data_mat[:, 0] - data_mat[:, i + 1]).flat)
            plt.text(-50, 40, r'RMSE = %.1f [kJ/mol]' % (rmse))
            plt.xlabel(r'observed $\Delta G_r^\circ$ from NIST [kJ/mol]')
            plt.ylabel(r'estimated $\Delta G_r^\circ$ using %s [kJ/mol]' %
                       thermo.name)
            plt.plot([-60, 60], [-60, 60], 'k--')
            plt.axis([-60, 60, -60, 60])

        html_writer.embed_matplotlib_figure(fig2, name=name + "_eval")

        table_headers = [
            "dG'0 (obs)",
            "dG'0 (%s)" % thermo1.name,
            "dG'0 (%s)" % thermo2.name, "reaction", "rid", "pH", "pMg", "I",
            "T", "eval.", "url"
        ]
        dict_list = []
        for row in sorted(total_list,
                          key=lambda (x): abs(x[1] - x[2]),
                          reverse=True):
            d = {}
            d["dG'0 (obs)"] = '%.1f' % row[0]
            d["dG'0 (%s)" % thermo1.name] = '%.1f' % row[1]
            d["dG'0 (%s)" % thermo2.name] = '%.1f' % row[2]
            d['reaction'] = row[3].to_hypertext(show_cids=False)
            if row[3].rid is not None:
                d['rid'] = '<a href="%s">R%05d</a>' % (row[3].get_link(),
                                                       row[3].rid)
            else:
                d['rid'] = ''
            d['pH'] = '%.1f' % row[4]
            d['pMg'] = '%.1f' % row[5]
            d['I'] = '%.2f' % row[6]
            d['T'] = '%.1f' % row[7]
            d['eval.'] = row[8]
            if row[9]:
                d['url'] = '<a href="%s">link</a>' % row[9]
            else:
                d['url'] = ''
            dict_list.append(d)
        html_writer.write_table(dict_list, table_headers)

    def SelectRowsFromNist(self,
                           reaction=None,
                           check_reverse=True,
                           T_range=None,
                           pH_range=None):
        T_range = T_range or self.T_range
        pH_range = pH_range or self.pH_range
        rows = []
        checklist = []
        if reaction:
            checklist.append(reaction)
            if check_reverse:
                checklist.append(reaction.reverse())
        for nist_row_data in self.data:
            if T_range and not (T_range[0] < nist_row_data.T < T_range[1]):
                continue
            if pH_range and not (pH_range[0] < nist_row_data.pH < pH_range[1]):
                continue
            if checklist and nist_row_data.reaction not in checklist:
                continue
            if self.override_pMg or self.override_I or self.override_T:
                nist_row_copy = nist_row_data.Clone()
                if self.override_pMg:
                    nist_row_copy.pMg = self.override_pMg
                if self.override_I:
                    nist_row_copy.I = self.override_I
                if self.override_T:
                    nist_row_copy.T = self.override_T
                rows.append(nist_row_copy)
            else:
                rows.append(nist_row_data)
        return rows

    def GetUniqueReactionSet(self):
        return set([row.reaction for row in self.data])