def __init__(self,
                 db,
                 html_writer=None,
                 dissociation=None,
                 anchor_all=False):
        PsuedoisomerTableThermodynamics.__init__(
            self, name="Unified Group Contribution")
        self.db = db
        self.html_writer = html_writer or NullHtmlWriter()
        self.dissociation = dissociation
        self.transformed = False
        self.CollapseReactions = False
        self.epsilon = 1e-10
        self.kegg = Kegg.getInstance()

        self.STOICHIOMETRIC_TABLE_NAME = 'ugc_S'
        self.GROUP_TABLE_NAME = 'ugc_G'
        self.GIBBS_ENERGY_TABLE_NAME = 'ugc_b'
        self.ANCHORED_TABLE_NAME = 'ugc_anchored'
        self.COMPOUND_TABLE_NAME = 'ugc_compounds'
        self.OBSERVATION_TABLE_NAME = 'ugc_observations'
        self.GROUPVEC_TABLE_NAME = 'ugc_groupvectors'
        self.UNIQUE_OBSERVATION_TABLE_NAME = 'ugc_unique_observations'
        self.THERMODYNAMICS_TABLE_NAME = 'ugc_pseudoisomers'
        self.ERRORS_TABLE_NAME = 'ugc_errors'
        self.CONSERVATIONS_TABLE_NAME = 'ugc_conservations'

        if anchor_all:
            self.FORMATION_ENERGY_FILENAME = '../data/thermodynamics/formation_energies_anchor_all.csv'
        else:
            self.FORMATION_ENERGY_FILENAME = '../data/thermodynamics/formation_energies.csv'
Example #2
0
def AnalyzeConcentrationGradient(prefix,
                                 thermo,
                                 csv_output_fname,
                                 cid=13):  # default compound is PPi
    compound_name = thermo.kegg.cid2name(cid)
    kegg_file = ParsedKeggFile.FromKeggFile('../data/thermodynamics/%s.txt' %
                                            prefix)
    html_writer = HtmlWriter('../res/%s.html' % prefix)
    null_html_writer = NullHtmlWriter()
    if csv_output_fname:
        csv_output = csv.writer(open(csv_output_fname, 'w'))
        csv_output.writerow(['pH', 'I', 'T', '[C%05d]' % cid] +
                            kegg_file.entries())
    else:
        csv_output = None

    pH_vec = np.array(
        [7])  # this needs to be fixed so that the txt file will set the pH
    conc_vec = 10**(-np.arange(2, 6.0001, 0.25)
                    )  # logarithmic scale between 10mM and 1nM
    override_bounds = {}

    fig = plt.figure(figsize=(6, 6), dpi=90)
    legend = []
    for pH in pH_vec.flat:
        obd_vec = []
        for conc in conc_vec.flat:
            override_bounds[cid] = (conc, conc)
            logging.info("pH = %g, [%s] = %.1e M" % (pH, compound_name, conc))
            data, labels = pareto(kegg_file,
                                  null_html_writer,
                                  thermo,
                                  pH=pH,
                                  section_prefix="",
                                  balance_water=True,
                                  override_bounds=override_bounds)
            obd_vec.append(data[:, 1])
            csv_output.writerow([pH, thermo.I, thermo.T, conc] +
                                list(data[:, 1].flat))
        obd_mat = np.matrix(
            obd_vec)  # rows are pathways and columns are concentrations
        plt.plot(conc_vec, obd_mat, '.-', figure=fig)
        legend += ['%s, pH = %g' % (l, pH) for l in labels]

    plt.title("ODB vs. [%s] (I = %gM, T = %gK)" %
              (compound_name, thermo.I, thermo.T),
              figure=fig)
    plt.xscale('log')
    plt.xlabel('Concentration of %s [M]' % thermo.kegg.cid2name(cid),
               figure=fig)
    plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig)
    plt.legend(legend)
    html_writer.write('<h2 id="figure_%s">Summary figure</h1>\n' % prefix)
    html_writer.embed_matplotlib_figure(fig, name=prefix)

    html_writer.close()
Example #3
0
 def FromDatabase(db, table_name, transformed=False):
     html_writer = NullHtmlWriter()
     dissociation = None
     obs_collections = KeggObervationCollection(html_writer, dissociation,
                                                transformed)
     obs_collections.observations = []
     for row in db.DictReader(table_name):
         obs = KeggObservation.FromDatabaseRow(row)
         obs_collections.observations.append(obs)
     return obs_collections
Example #4
0
 def __init__(self, db, dissociation=None,
              html_writer=None, nist=None):
     PsuedoisomerTableThermodynamics.__init__(self)
     self.db = db
     self.html_writer = html_writer or NullHtmlWriter()
     self.nist = nist or Nist()
     self.dissociation = None
     
     self.cid2pmap_dict = {}
     
     self.assume_no_pKa_by_default = False
     self.std_diff_threshold = np.inf
Example #5
0
    def __init__(self, db, html_writer=None, transformed=False):
        """Construct a GroupContribution instance.
        
        Args:
            db: the database handle to read from.
            html_writer: the HtmlWriter to write to.
            kegg: a Kegg instance if you don't want to use the default one.
        """
        PsuedoisomerTableThermodynamics.__init__(self,
                                                 name="Group Contribution")
        self.db = db
        self.html_writer = html_writer or NullHtmlWriter()
        self.dissociation = None
        self.transformed = transformed

        self.epsilon = 1e-10

        self.kegg = Kegg.getInstance()
        self.bounds = deepcopy(self.kegg.cid2bounds)

        self.group_nullspace = None
        self.group_contributions = None
        self.obs_collection = None

        self.cid2error = {}
        self.cid2groupvec = None

        if transformed:
            prefix = 'bgc'
        else:
            prefix = 'pgc'

        self.OBSERVATION_TABLE_NAME = prefix + '_observations'
        self.GROUPVEC_TABLE_NAME = prefix + '_groupvector'
        self.NULLSPACE_TABLE_NAME = prefix + '_nullspace'
        self.CONTRIBUTION_TABLE_NAME = prefix + '_contribution'
        self.REGRESSION_TABLE_NAME = prefix + '_regression'

        self.THERMODYNAMICS_TABLE_NAME = prefix + '_pseudoisomers'
        self.STOICHIOMETRIC_MATRIX_TABLE_NAME = prefix + '_stoichiometry'
        self.ANCHORED_CONTRIBUTIONS_TALBE_NAME = prefix + '_anchored_g'
        self.ANCHORED_CIDS_TABLE_NAME = prefix + '_anchored_cids'
        self.ANCHORED_P_L_TALBE_NAME = prefix + '_anchored_P_L'
Example #6
0
def main():
    estimators = LoadAllEstimators()
    args, _ = MakeOpts(estimators).parse_args(sys.argv)
    
    # Make sure we have all the data.
    db = SqliteDatabase('../res/gibbs.sqlite')
    G = GroupContribution(db=db, html_writer=NullHtmlWriter(),
                          transformed=args.transformed)
    G.init()
    
    print 'Exporting KEGG compounds to %s' % args.compounds_out_filename
    csv_writer = csv.writer(open(args.compounds_out_filename, 'w'))
    csv_writer.writerow(["KEGG ID", "nH", "CHARGE", "nMg", "dG0_f"])
    for cid in sorted(G.get_all_cids()):
        try:
            for nH, z, nMg, dG0 in G.cid2PseudoisomerMap(cid).ToMatrix():
                csv_writer.writerow(["C%05d" % cid, nH, z, nMg, "%.1f" % dG0])
        except MissingCompoundFormationEnergy as e:
            csv_writer.writerow(["C%05d" % cid, None, None, None, str(e)])
        
    print 'Exporting KEGG reactions to %s' % args.reactions_out_filename
    csv_writer = csv.writer(open(args.reactions_out_filename, 'w'))
    csv_writer.writerow(["KEGG ID", "dG'0_r (pH=%.1f, I=%.2f, pMg=%.1f, T=%.1f)" % 
                         (args.ph, args.i_s, args.pmg, args.temp)])
    for rid in sorted(G.kegg.get_all_rids()):
        reaction = G.kegg.rid2reaction(rid)
        try:
            reaction.Balance(balance_water=True)
            dG0_r = reaction.PredictReactionEnergy(G, pH=args.ph,
                        pMg=args.pmg, I=args.i_s, T=args.temp)
            csv_writer.writerow(["R%05d" % rid, "%.1f" % dG0_r])
        except (KeggParseException,
                MissingCompoundFormationEnergy, 
                KeggReactionNotBalancedException,
                MissingReactionEnergy,
                KeyError,
                OpenBabelError) as e:
            csv_writer.writerow(["R%05d" % rid, str(e)])
Example #7
0
def nist_dissociation_test():
    """
        Verifies that all the compounds in NIST are covered by the dissociation table, including SMILES strings.
    """
    db = SqliteDatabase('../res/gibbs.sqlite')
    nist_regression = NistRegression(db, html_writer=NullHtmlWriter())
    dissociation = nist_regression.dissociation
    groups_data = GroupsData.FromDatabase(db)
    group_decomposer = GroupDecomposer(groups_data)
    kegg = Kegg.getInstance()

    nist = nist_regression.nist
    for cid in nist.GetAllCids():
        id = "C%05d (%s)" % (cid, kegg.cid2name(cid))
        if kegg.cid2compound(cid).get_atom_bag() is None:
            logging.debug('%s: has no explicit formula' % id)
        else:
            diss = dissociation.GetDissociationTable(cid,
                                                     create_if_missing=False)
            test_dissociation_table(diss,
                                    group_decomposer,
                                    id,
                                    ignore_missing_smiles=False)
Example #8
0
def GetAllOBDs(pathway_list,
               html_writer,
               thermo,
               pH=None,
               plot_profile=False,
               section_prefix="",
               balance_water=True,
               override_bounds={}):
    """
        Return value is a list or dictionaries containing the following fields:
        
            entry         - the name of the pathway
            remark        - typically the exception message if something went wrong
            OBD           - optimized distributed bottleneck (in kJ/mol)
            FFE           - flux-force efficiency (between -1 and 1)
            min total dG  - in kJ/mol
            max total dG  - in kJ/mol
            sum of fluxes - the sum of all fluxes
    """
    if html_writer is None:
        html_writer = NullHtmlWriter()

    html_writer.write('<h2 id="%s_tables">Individual result tables</h1>\n' %
                      section_prefix)
    rowdicts = []
    for entry, p_data in pathway_list:
        rowdict = defaultdict(float)
        rowdicts.append(rowdict)
        rowdict['entry'] = entry
        rowdict['remark'] = 'okay'

        if p_data.skip:
            logging.info("Skipping pathway: %s", rowdict['entry'])
            rowdict['remark'] = 'skipping'
            continue

        if pH is None:
            pH = p_data.pH
        thermo.SetConditions(pH=pH, I=p_data.I, T=p_data.T, pMg=p_data.pMg)
        thermo.c_range = p_data.c_range
        rowdict['pH'] = thermo.pH
        rowdict['I'] = thermo.I
        rowdict['T'] = thermo.T
        rowdict['pMg'] = thermo.pMg

        #html_writer.write('<a name="%s"></a>\n' % entry)
        html_writer.write('<h3 id="%s_%s">%s</h2>\n' %
                          (section_prefix, rowdict['entry'], rowdict['entry']))

        S, rids, fluxes, cids = p_data.get_explicit_reactions(
            balance_water=balance_water)
        fluxes = np.matrix(fluxes)
        rowdict['rids'] = rids
        rowdict['cids'] = cids
        rowdict['fluxes'] = fluxes

        thermo.bounds = p_data.GetBounds().GetOldStyleBounds(cids)
        for cid, (lb, ub) in override_bounds.iteritems():
            thermo.bounds[cid] = (lb, ub)

        dG0_r_prime = thermo.GetTransfromedReactionEnergies(S, cids)
        rowdict['dG0_r_prime'] = dG0_r_prime

        keggpath = KeggPathway(S,
                               rids,
                               fluxes,
                               cids,
                               reaction_energies=dG0_r_prime,
                               cid2bounds=thermo.bounds,
                               c_range=thermo.c_range)
        rowdict['formulas'] = [
            keggpath.GetReactionString(r, show_cids=False)
            for r in xrange(len(rids))
        ]

        if np.any(np.isnan(dG0_r_prime)):
            html_writer.write('NaN reaction energy')
            keggpath.WriteProfileToHtmlTable(html_writer)
            keggpath.WriteConcentrationsToHtmlTable(html_writer)
            logging.info('%20s: OBD = NaN, maxTG = NaN' % (entry))
            rowdict['remark'] = 'NaN reaction energy'
            continue

        obd, params = keggpath.FindOBD()
        odfe = 100 * np.tanh(obd / (2 * R * thermo.T))

        rowdict['OBD'] = obd
        rowdict['FFE'] = odfe
        rowdict['sum of fluxes'] = np.sum(fluxes)
        rowdict['concentrations'] = params['concentrations']
        rowdict['reaction prices'] = params['reaction prices']
        rowdict['compound prices'] = params['compound prices']
        rowdict['min total dG'] = params['minimum total dG']
        rowdict['max total dG'] = params['maximum total dG']

        logging.info('%20s: OBD = %.1f [kJ/mol], maxTG = %.1f [kJ/mol]' %
                     (rowdict['entry'], obd, rowdict['max total dG']))
        html_writer.write_ul([
            "pH = %.1f, I = %.2fM, T = %.2f K" %
            (thermo.pH, thermo.I, thermo.T),
            "OBD = %.1f [kJ/mol]" % obd,
            "flux-force efficiency = %.1f%%" % odfe,
            "Min Total %s = %.1f [kJ/mol]" %
            (symbol_dr_G_prime, rowdict['min total dG']),
            "Max Total %s = %.1f [kJ/mol]" %
            (symbol_dr_G_prime, rowdict['max total dG'])
        ])
        rowdict[
            'dG_r_prime'] = keggpath.CalculateReactionEnergiesUsingConcentrations(
                rowdict['concentrations'])
        keggpath.WriteResultsToHtmlTables(html_writer,
                                          rowdict['concentrations'],
                                          rowdict['reaction prices'],
                                          rowdict['compound prices'])

    html_writer.write('<h2 id="%s_summary">Summary table</h1>\n' %
                      section_prefix)
    dict_list = [{
        'Name':
        '<a href="#%s_%s">%s</a>' % (section_prefix, d['entry'], d['entry']),
        'OBD [kJ/mol]':
        '%.1f' % d['OBD'],
        'flux-force eff.':
        '%.1f%%' % d['FFE'],
        'Total dG\' [kJ/mol]':
        '%6.1f - %6.1f' % (d['min total dG'], d['max total dG']),
        'sum(flux)':
        '%g' % d['sum of fluxes'],
        'remark':
        d['remark']
    } for d in rowdicts]
    html_writer.write_table(dict_list,
                            headers=[
                                'Name', 'OBD [kJ/mol]', 'flux-force eff.',
                                'Total dG\' [kJ/mol]', 'sum(flux)', 'remark'
                            ])

    return rowdicts
Example #9
0
def GetAllOBDs(pathway_list, html_writer, thermo, pH=None,
               plot_profile=False, section_prefix="", balance_water=True,
               override_bounds={}):
    """
        Return value is a list or dictionaries containing the following fields:
        
            entry         - the name of the pathway
            remark        - typically the exception message if something went wrong
            OBD           - optimized distributed bottleneck (in kJ/mol)
            FFE           - flux-force efficiency (between -1 and 1)
            min total dG  - in kJ/mol
            max total dG  - in kJ/mol
            sum of fluxes - the sum of all fluxes
    """
    if html_writer is None:
        html_writer = NullHtmlWriter()
    
    html_writer.write('<h2 id="%s_tables">Individual result tables</h1>\n' % section_prefix)
    rowdicts = []
    for entry, p_data in pathway_list:
        rowdict = defaultdict(float) 
        rowdicts.append(rowdict)
        rowdict['entry'] = entry
        rowdict['remark'] = 'okay'

        if p_data.skip:
            logging.info("Skipping pathway: %s", rowdict['entry'])
            rowdict['remark'] = 'skipping'
            continue
        
        if pH is None:
            pH = p_data.pH
        thermo.SetConditions(pH=pH, I=p_data.I, T=p_data.T, pMg=p_data.pMg)
        thermo.c_range = p_data.c_range
        rowdict['pH'] = thermo.pH
        rowdict['I'] = thermo.I
        rowdict['T'] = thermo.T
        rowdict['pMg'] = thermo.pMg

        #html_writer.write('<a name="%s"></a>\n' % entry)
        html_writer.write('<h3 id="%s_%s">%s</h2>\n' % (section_prefix, rowdict['entry'], rowdict['entry']))

        S, rids, fluxes, cids = p_data.get_explicit_reactions(balance_water=balance_water)
        fluxes = np.matrix(fluxes)
        rowdict['rids'] = rids
        rowdict['cids'] = cids
        rowdict['fluxes'] = fluxes

        thermo.bounds = p_data.GetBounds().GetOldStyleBounds(cids)
        for cid, (lb, ub) in override_bounds.iteritems():
            thermo.bounds[cid] = (lb, ub)
        
        dG0_r_prime = thermo.GetTransfromedReactionEnergies(S, cids)
        rowdict['dG0_r_prime'] = dG0_r_prime
        
        keggpath = KeggPathway(S, rids, fluxes, cids, reaction_energies=dG0_r_prime,
                               cid2bounds=thermo.bounds, c_range=thermo.c_range)
        rowdict['formulas'] = [keggpath.GetReactionString(r, show_cids=False)
                               for r in xrange(len(rids))]

        if np.any(np.isnan(dG0_r_prime)):
            html_writer.write('NaN reaction energy')
            keggpath.WriteProfileToHtmlTable(html_writer)
            keggpath.WriteConcentrationsToHtmlTable(html_writer)
            logging.info('%20s: OBD = NaN, maxTG = NaN' % (entry))
            rowdict['remark'] = 'NaN reaction energy'
            continue

        obd, params = keggpath.FindOBD()
        odfe = 100 * np.tanh(obd / (2*R*thermo.T))

        rowdict['OBD'] = obd
        rowdict['FFE'] = odfe
        rowdict['sum of fluxes'] = np.sum(fluxes)
        rowdict['concentrations'] = params['concentrations']
        rowdict['reaction prices'] = params['reaction prices']
        rowdict['compound prices'] = params['compound prices']
        rowdict['min total dG'] = params['minimum total dG']
        rowdict['max total dG'] = params['maximum total dG']

        logging.info('%20s: OBD = %.1f [kJ/mol], maxTG = %.1f [kJ/mol]' %
                     (rowdict['entry'], obd, rowdict['max total dG']))
        html_writer.write_ul(["pH = %.1f, I = %.2fM, T = %.2f K" % (thermo.pH, thermo.I, thermo.T),
                              "OBD = %.1f [kJ/mol]" % obd,
                              "flux-force efficiency = %.1f%%" % odfe,
                              "Min Total %s = %.1f [kJ/mol]" % (symbol_dr_G_prime, rowdict['min total dG']),
                              "Max Total %s = %.1f [kJ/mol]" % (symbol_dr_G_prime, rowdict['max total dG'])])
        rowdict['dG_r_prime'] = keggpath.CalculateReactionEnergiesUsingConcentrations(rowdict['concentrations'])
        keggpath.WriteResultsToHtmlTables(html_writer, rowdict['concentrations'],
            rowdict['reaction prices'], rowdict['compound prices'])
        
    html_writer.write('<h2 id="%s_summary">Summary table</h1>\n' % section_prefix)
    dict_list = [{'Name':'<a href="#%s_%s">%s</a>' % (section_prefix, d['entry'], d['entry']),
                  'OBD [kJ/mol]':'%.1f' % d['OBD'],
                  'flux-force eff.':'%.1f%%' % d['FFE'],
                  'Total dG\' [kJ/mol]':'%6.1f - %6.1f' % (d['min total dG'], d['max total dG']),
                  'sum(flux)':'%g' % d['sum of fluxes'],
                  'remark': d['remark']}
                 for d in rowdicts]
    html_writer.write_table(dict_list,
        headers=['Name', 'OBD [kJ/mol]', 'flux-force eff.', 'Total dG\' [kJ/mol]', 'sum(flux)', 'remark'])

    return rowdicts
Example #10
0
                        continue
                    elem_ne += count * Molecule.GetAtomicNum(elem)
                    elem_cid, elem_coeff = atom2cid[elem]
                    sparse.setdefault(elem_cid, 0)
                    sparse[elem_cid] += -count * elem_coeff

                # use the H element to balance the electrons in the formation
                # reactions (we don't need to balance protons since this is
                # a biochemical reaction, so H+ are 'free').
                H_cid, H_coeff = atom2cid['H']
                sparse[H_cid] = (elem_ne - ne) * H_coeff
                reaction = Reaction(
                    "formation of %s" % self.kegg.cid2name(cid), sparse)

                output_csv.writerow(
                    (ref, 'C%05d' % cid, 'formation', 'A', '',
                     'formation of %s' % self.kegg.cid2name(cid),
                     reaction.FullReactionString(),
                     reaction.FullReactionString(show_cids=False),
                     '%.2f' % dG0_prime, self.T, self.I, self.pH, self.pMg))


if __name__ == "__main__":
    dissociation = DissociationConstants.FromPublicDB()
    html_writer = NullHtmlWriter()
    obs_col = KeggObervationCollection(html_writer,
                                       dissociation,
                                       transformed=True)

    obs_col.ConvertFormation2Reaction(
        output_fname='../res/formation_reactions.csv')