Esempio n. 1
0
 def __init__(self, S, reaction_ids, compound_ids,
              fluxes=None, name=None):
     """Initialize the stoichiometric model.
     
     Args:
         S: the stoichiometrix matrix.
            Reactions are on the rows, compounds on the columns.
         reaction_ids: the ids/names of the reactions (rows).
         compound_ids: the ids/names of the compounds (columns).
         fluxes: the list of relative fluxes through all reactions.
                 if not supplied, assumed to be 1.0 for all reactions.
         name: a string name for this model.
     """
     self.kegg = Kegg.getInstance()
     self.S = S
     self.reaction_ids = reaction_ids
     self.compound_ids = compound_ids
     self.Nr = len(self.reaction_ids)
     self.Nc = len(self.compound_ids)
     self.name = name
     self.slug_name = util.slugify(self.name)
     
     self.fluxes = np.array(fluxes)
     if fluxes is None:
         self.fluxes = np.ones((1, self.Nr))
     
     expected_Nc, expected_Nr = self.S.shape
     if self.Nr != expected_Nr:
         raise ValueError('Number of columns does not match number of reactions')
     if self.Nc != expected_Nc:
         raise ValueError('Number of rows does not match number of compounds')
     
     if self.fluxes is None:
         self.fluxes = np.ones((self.Nr, 1)) 
Esempio n. 2
0
def main():
    html_fname = '../res/reversibility.html'
    logging.info('Writing HTML output to %s', html_fname)
    html_writer = HtmlWriter(html_fname)
    
    # plot the profile graph
    pylab.rcParams['text.usetex'] = False
    pylab.rcParams['legend.fontsize'] = 10
    pylab.rcParams['font.family'] = 'sans-serif'
    pylab.rcParams['font.size'] = 14
    pylab.rcParams['lines.linewidth'] = 2
    pylab.rcParams['lines.markersize'] = 6
    pylab.rcParams['figure.figsize'] = [6.0, 6.0]
    pylab.rcParams['figure.dpi'] = 90
    
    estimators = LoadAllEstimators()
    #analyse_reversibility(estimators['hatzi_gc'], 'HatziGC')
    #analyse_reversibility(estimators['PGC'], 'MiloGC_zoom')
    
    reaction_list = Kegg.getInstance().AllReactions()
    #reaction_list = Feist.FromFiles().reactions
    thermo = estimators['PGC']
    
    thermo.c_mid = DEFAULT_CMID
    thermo.T = DEFAULT_T
    thermo.pH = DEFAULT_PH
    thermo.I = DEFAULT_I
    thermo.pMg = DEFAULT_PMG

    compare_reversibility_to_dG0(reaction_list, thermo=thermo,
                                 html_writer=html_writer)
Esempio n. 3
0
    def GetJSONDictionary(self):
        """Returns a JSON formatted thermodynamic data."""
        kegg = Kegg.getInstance()
        formations = []
        for cid in self.get_all_cids():
            h = {}
            h['cid'] = cid
            try:
                h['name'] = kegg.cid2name(h['cid'])
            except KeyError:
                h['name'] = None
            try:
                h['inchi'] = kegg.cid2inchi(h['cid'])
            except KeyError:
                h['inchi'] = None
            try:
                h['num_electrons'] = kegg.cid2num_electrons(h['cid'])
            except KeggParseException:
                h['num_electrons'] = None

            h['source'] = self.cid2source_string.get(cid, None)
            h['species'] = []
            for nH, z, nMg, dG0 in self.cid2PseudoisomerMap(cid).ToMatrix():
                h['species'].append({"nH":nH, "z":z, "nMg":nMg, "dG0_f":dG0})
            formations.append(h)
        
        return formations
def GetFullOxidationReaction(cid):
    kegg = Kegg.getInstance()

    basic_cids = [1, 7, 9, 11, 14]  # H2O, O2, Pi, CO2, NH3
    basic_elements = ["C", "O", "P", "N", "e-"]
    element_mat = np.matrix(np.zeros((len(basic_elements), len(basic_cids))))
    for j in xrange(len(basic_cids)):
        atom_bag = kegg.cid2atom_bag(basic_cids[j])
        atom_bag["e-"] = kegg.cid2num_electrons(basic_cids[j])
        for i in xrange(len(basic_elements)):
            element_mat[i, j] = atom_bag.get(basic_elements[i], 0)

    cs_element_vec = np.zeros((len(basic_elements), 1))
    atom_bag = kegg.cid2atom_bag(cid)
    atom_bag["e-"] = kegg.cid2num_electrons(cid)
    for i in xrange(len(basic_elements)):
        cs_element_vec[i, 0] = atom_bag.get(basic_elements[i], 0)

    x = np.linalg.inv(element_mat) * cs_element_vec

    sparse = dict([(basic_cids[i], np.round(x[i, 0], 3)) for i in xrange(len(basic_cids))])
    sparse[cid] = -1

    r = Reaction("complete oxidation of %s" % kegg.cid2name(cid), sparse)

    return r
Esempio n. 5
0
def GetMolInput(dissociation):
    mols = [] # a list of pairs of Molecule objects and stoichiometric coefficients 
    while mols == []:
        print 'KEGG ID or SMILES (or Enter to quit):',
        s_input = raw_input()
        if not s_input:
            return []
        elif re.findall('C\d\d\d\d\d', s_input) != []:
            try:
                cid = int(s_input[1:])
                mols = [(GetMostAbundantMol(cid, dissociation), 1)]
                print "Compound:", mols[0][0].ToInChI()
            except ValueError:
                print 'syntax error: KEGG compound ID is bad (%s), please try again' % s_input
        elif re.findall('R\d\d\d\d\d', s_input) != []:
            try:
                rid = int(s_input[1:])
                reaction = Kegg.getInstance().rid2reaction(rid)
                print "Reaction:", str(reaction)
                for cid, coeff in reaction.iteritems():
                    mols += [(GetMostAbundantMol(cid, dissociation), coeff)]
            except ValueError:
                print 'syntax error: KEGG reaction ID is bad (%s), please try again' % s_input
        else:
            try:
                mols = [(Molecule.FromSmiles(s_input), 1)]
                print "Compound:", mols[0][0].ToInChI()
            except Exception:
                print 'unable to parse SMILES string, please try again'
        
    return mols
Esempio n. 6
0
 def Populate(self, filename):
     """Populates the database from files."""
     self._InitTables()
     
     f = open(filename)
     r = csv.DictReader(f)
     
     for row in r:
         insert_row = []
         for table_header in self.ORG_TABLE_HEADERS:
             if table_header not in self.CSV_HEADER_MAPPING:
                 insert_row.append(None)
                 continue
             
             csv_header = self.CSV_HEADER_MAPPING[table_header]
             val = row.get(csv_header, None)
             if val and val.strip():
                 insert_row.append(val)
             else: 
                 insert_row.append(None)
             
         oxy_req = row.get(self.OXY_REQ, None)
         broad_req = self.GetBroadyOxyReq(oxy_req)
         insert_row[-1] = broad_req
         
         self.db.Insert('organisms', insert_row)     
     f.close()
     
     k = Kegg.getInstance(loadFromAPI=False)
     enzyme_map = k.ec2enzyme_map
     for ec, enzyme in enzyme_map.iteritems():
         for org in enzyme.genes.keys():                
             self.db.Insert('organism_enzymes', [org.lower(), ec])
 def __init__(self, db, html_writer, thermodynamics,
              kegg=None):
     self.db = db
     self.html_writer = html_writer
     self.thermo = thermodynamics
     self.kegg = kegg or Kegg.getInstance()
     self.pathways = {}
def main():
    pH, I, pMg, T = 7.0, 0.25, 14.0, 298.15

    dissociation = DissociationConstants.FromPublicDB()
    kegg = Kegg.getInstance()
    obs_fname = "../data/thermodynamics/formation_energies.csv"
    res_fname = "../res/formation_energies_transformed.csv"

    train_species = PsuedoisomerTableThermodynamics.FromCsvFile(obs_fname, label="testing")
    csv_out = csv.writer(open(res_fname, "w"))
    csv_out.writerow(["cid", "name", "dG'0", "pH", "I", "pMg", "T", "anchor", "compound_ref", "remark"])
    for cid in train_species.get_all_cids():
        pmap = train_species.cid2PseudoisomerMap(cid)
        source = train_species.cid2source_string[cid]
        pmatrix = pmap.ToMatrix()  # ToMatrix returns tuples of (nH, z, nMg, dG0)
        if len(pmatrix) != 1:
            raise Exception("multiple training species for C%05d" % cid)
        nH, charge, nMg, dG0 = pmatrix[0]
        name = "%s (%d)" % (kegg.cid2name(cid), nH)
        logging.info("Adding the formation energy of %s", name)
        diss_table = dissociation.GetDissociationTable(cid, create_if_missing=True)
        if diss_table is None:
            raise Exception("%s [C%05d, nH=%d, nMg=%d] does not have a " "dissociation table" % (name, cid, nH, nMg))

        diss_table.SetFormationEnergyByNumHydrogens(dG0, nH, nMg)
        diss_table.SetCharge(nH, charge, nMg)
        dG0_prime = diss_table.Transform(pH, I, pMg, T)
        csv_out.writerow([cid, kegg.cid2name(cid), "%.1f" % dG0_prime, pH, I, pMg, T, True, source, None])
def main():
    opt_parser = flags.MakeOpts()
    options, _ = opt_parser.parse_args(sys.argv)
    estimators = LoadAllEstimators()
    
    print ('Parameters: T=%f K, pH=%.2g, pMg=%.2g, '
           'I=%.2gmM, Median concentration=%.2gM' % 
           (default_T, options.ph, options.pmg, options.i_s, options.c_mid))

    for thermo in estimators.values():
        thermo.c_mid = options.c_mid
        thermo.pH = options.ph
        thermo.pMg = options.pmg
        thermo.I = options.i_s
        thermo.T = default_T
    
    kegg = Kegg.getInstance()
    while True:
        cid = GetReactionIdInput()        
        compound = kegg.cid2compound(cid)
        print 'Compound Name: %s' % compound.name
        print '\tKegg ID: C%05d' % cid
        print '\tFormula: %s' % compound.formula
        print '\tInChI: %s' % compound.inchi
        for key, thermo in estimators.iteritems():
            print "\t<< %s >>" % key
            try:
                print thermo.cid2PseudoisomerMap(cid),
                print '--> dG0\'f = %.1f kJ/mol' % compound.PredictFormationEnergy(thermo)
            except Exception as e: 
                print '\t\tError: %s' % (str(e))
Esempio n. 10
0
    def run(self):
        from toolbox.molecule import Molecule
        
        self.semaphore.acquire()
        
        start_time = time.time()

        logging.debug("SMILES: " + self.smiles)
        diss_table = Molecule._GetDissociationTable(self.smiles, fmt='smiles',
            mid_pH=default_pH, min_pKa=0, max_pKa=14, T=default_T)
        logging.debug("Min charge: %d" % diss_table.min_charge)
        logging.debug("Min nH: %d" % diss_table.min_nH)
        
        elapsed_time = time.time() - start_time
        self.db_lock.acquire()
        db = SqliteDatabase(self.options.db_file)
        kegg = Kegg.getInstance()
        name = kegg.cid2name(self.cid)
        
        if diss_table is not None:
            for row in diss_table.ToDatabaseRow():
                db.Insert(self.options.table_name, [self.cid, name] + row)
        else:
            db.Insert(self.options.table_name, [self.cid, name] + [None] * 10)
        del db
        self.db_lock.release()

        logging.info("Completed C%05d, elapsed time = %.1f sec" %
                     (self.cid, elapsed_time))

        self.semaphore.release()
 def __init__(self, db, html_writer=None, dissociation=None, anchor_all=False):
     PsuedoisomerTableThermodynamics.__init__(self, name="Unified Group Contribution")
     self.db = db
     self.html_writer = html_writer or NullHtmlWriter()
     self.dissociation = dissociation
     self.transformed = False
     self.CollapseReactions = False
     self.epsilon = 1e-10
     self.kegg = Kegg.getInstance()
     
     self.STOICHIOMETRIC_TABLE_NAME = 'ugc_S'
     self.GROUP_TABLE_NAME = 'ugc_G'
     self.GIBBS_ENERGY_TABLE_NAME = 'ugc_b'
     self.ANCHORED_TABLE_NAME = 'ugc_anchored'
     self.COMPOUND_TABLE_NAME = 'ugc_compounds'
     self.OBSERVATION_TABLE_NAME = 'ugc_observations'
     self.GROUPVEC_TABLE_NAME = 'ugc_groupvectors'
     self.UNIQUE_OBSERVATION_TABLE_NAME = 'ugc_unique_observations'
     self.THERMODYNAMICS_TABLE_NAME = 'ugc_pseudoisomers'
     self.ERRORS_TABLE_NAME = 'ugc_errors'
     self.CONSERVATIONS_TABLE_NAME = 'ugc_conservations'
     
     if anchor_all:
         self.FORMATION_ENERGY_FILENAME = '../data/thermodynamics/formation_energies_anchor_all.csv'
     else:
         self.FORMATION_ENERGY_FILENAME = '../data/thermodynamics/formation_energies.csv'
Esempio n. 12
0
    def FromChemAxon(cid2mol=None, html_writer=None):
        kegg = Kegg.getInstance()
        diss = DissociationConstants()
        if cid2mol is None:
            cid2mol = dict([(cid, None) for cid in kegg.get_all_cids()])
        
        for cid, mol in sorted(cid2mol.iteritems()):
            logging.info("Using ChemAxon to find the pKa values for %s - C%05d" %
                         (kegg.cid2name(cid), cid))
            if html_writer:
                html_writer.write('<h2>%s - C%05d</h2>\n' %
                                  (kegg.cid2name(cid), cid))
            # if this CID is not assigned to a Molecule, use the KEGG database
            # to create a Molecule for it.
            if mol is None:
                try:
                    mol = kegg.cid2mol(cid)
                except KeggParseException:
                    continue

            diss_table = mol.GetDissociationTable()
            diss.cid2DissociationTable[cid] = diss_table
            if diss_table and html_writer:
                diss_table.WriteToHTML(html_writer)
                html_writer.write('</br>\n')
        return diss
Esempio n. 13
0
    def GetForamtionEnergies(self, thermo):
        self.db.CreateTable(self.GIBBS_ENERGY_TABLE_NAME, "equation TEXT, dG0 REAL, dGc REAL", drop_if_exists=True)
        self.db.CreateIndex('gibbs_equation_idx', self.GIBBS_ENERGY_TABLE_NAME, 'equation', unique=True, drop_if_exists=True)

        all_equations = set()
        for row in self.db.Execute("SELECT distinct(equation) FROM %s" % 
                                   (self.EQUATION_TABLE_NAME)):
            all_equations.add(str(row[0]))
        
        from pygibbs.kegg import Kegg
        kegg = Kegg.getInstance()
        all_kegg_cids = set(kegg.get_all_cids())
        for equation in all_equations:
            try:
                rxn = Reaction.FromFormula(equation)
                if not rxn.get_cids().issubset(all_kegg_cids):
                    raise KeggNonCompoundException
                rxn.Balance(balance_water=True, exception_if_unknown=True)
                dG0 = thermo.GetTransfromedKeggReactionEnergies([rxn], conc=1)[0, 0]
                dGc = thermo.GetTransfromedKeggReactionEnergies([rxn], conc=1e-3)[0, 0]
                self.db.Insert(self.GIBBS_ENERGY_TABLE_NAME, [equation, dG0, dGc])
                
            except (KeggParseException, KeggNonCompoundException, KeggReactionNotBalancedException):
                self.db.Insert(self.GIBBS_ENERGY_TABLE_NAME, [equation, None, None])
    
        self.db.Commit()
Esempio n. 14
0
    def Train(self, FromDatabase=True, prior_thermodynamics=None):
        if FromDatabase and self.db.DoesTableExist('prc_S'):
            S = self.db.LoadSparseNumpyMatrix('prc_S')
            dG0 = self.db.LoadNumpyMatrix('prc_b').T
            cids = []
            cid2nH_nMg = {}
            for rowdict in self.db.DictReader('prc_compounds'):
                cid, nH, nMg = int(rowdict['cid']), int(rowdict['nH']), int(rowdict['nMg'])
                cids.append(int(rowdict['cid']))
                cid2nH_nMg[cid] = (nH, nMg)
        else:
            cid2nH_nMg = self.GetDissociation().GetCid2nH_nMg(
                                            self.pH, self.I, self.pMg, self.T)
            S, dG0, cids = self.ReverseTransform(cid2nH_nMg=cid2nH_nMg)
            self.db.SaveSparseNumpyMatrix('prc_S', S)
            self.db.SaveNumpyMatrix('prc_b', dG0.T)
            self.db.CreateTable('prc_compounds',
                                'cid INT, name TEXT, nH INT, nMg INT')
            kegg = Kegg.getInstance()
            for cid in cids:
                nH, nMg = cid2nH_nMg[cid]
                self.db.Insert('prc_compounds',
                               [cid, kegg.cid2name(cid), nH, nMg])
            self.db.Commit()

        # Train the formation energies using linear regression
        self.LinearRegression(S, dG0, cids, cid2nH_nMg, prior_thermodynamics)
        self.ToDatabase(self.db, 'prc_pseudoisomers')
Esempio n. 15
0
def ExportJSONFiles():
    estimators = LoadAllEstimators()
    options, _ = MakeOpts(estimators).parse_args(sys.argv)
    
    thermo_list = []
    thermo_list.append(estimators[options.thermodynamics_source])
    thermo_list.append(PsuedoisomerTableThermodynamics.FromCsvFile(options.thermodynamics_csv))

    # Make sure we have all the data.
    kegg = Kegg.getInstance()
    for i, thermo in enumerate(thermo_list):
        print "Priority %d - formation energies of: %s" % (i+1, thermo.name)
        kegg.AddThermodynamicData(thermo, priority=(i+1))
    
    db = SqliteDatabase('../res/gibbs.sqlite')

    print 'Exporting Group Contribution Nullspace matrix as JSON.'
    nullspace_vectors = []
    for row in db.DictReader('ugc_conservations'):
        d = {'msg': row['msg']}
        sparse = json.loads(row['json'])
        d['reaction'] = []
        for cid, coeff in sparse.iteritems():
            d['reaction'].append([coeff, "C%05d" % int(cid)])
        nullspace_vectors.append(d)
    WriteJSONFile(nullspace_vectors, options.nullspace_out_filename)
        
    print 'Exporting KEGG compounds as JSON.'
    WriteJSONFile(kegg.AllCompounds(), options.compounds_out_filename)

    print 'Exporting KEGG reactions as JSON.'
    WriteJSONFile(kegg.AllReactions(), options.reactions_out_filename)
    
    print 'Exporting KEGG enzymes as JSON.'
    WriteJSONFile(kegg.AllEnzymes(), options.enzymes_out_filename)
Esempio n. 16
0
 def GetTransfromedKeggReactionEnergies(self, kegg_reactions,
                                        pH=None, I=None, pMg=None, T=None,
                                        conc=1):
     kegg = Kegg.getInstance()
     S, cids = kegg.reaction_list_to_S(kegg_reactions)
     return self.GetTransfromedReactionEnergies(S, cids,
                                                pH=pH, I=I, pMg=pMg, T=T,
                                                conc=conc)
def main():
    options, _ = flags.MakeOpts().parse_args(sys.argv)
    c_mid = options.c_mid
    pH = options.ph
    pMg = options.pmg
    I = options.i_s
    T = default_T

    db = SqliteDatabase("../res/gibbs.sqlite")
    kegg = Kegg.getInstance()
    G = GroupContribution(db)
    G.init()

    print ("Parameters: T=%f K, pH=%.2g, pMg=%.2g, " "I=%.2gM, Median concentration=%.2gM" % (T, pH, pMg, I, c_mid))

    cmap = {}
    if not options.ignore_cofactors:
        if options.full_metabolites:
            print "Fixing concentrations of all known metabolites"
            cmap = reversibility.GetFullConcentrationMap(G)
        else:
            print "Fixing concentrations of co-factors"
            cmap = reversibility.GetConcentrationMap(kegg)
    else:
        print "Not fixing concentrations of co-factors"

    if options.report_mode:
        print "Output used metabolites concentrations"

    while True:
        mid = GetModuleIdInput()

        rid_flux_list = kegg.mid2rid_map[mid]

        for rid, flux in rid_flux_list:
            try:
                reaction = kegg.rid2reaction(rid)
                print "Reaction Name", reaction.name
                print "\tKegg Id", reaction.rid
                print "\tEC", reaction.ec_list
                rev = reversibility.CalculateReversability(
                    reaction.sparse, G, pH=pH, I=I, pMg=pMg, T=T, concentration_map=cmap
                )
                if rev == None:
                    dG = G.estimate_dG_reaction(reaction.sparse, pH=pH, pMg=pMg, I=I, T=T, c0=c_mid, media="glucose")
                    print "\tReversibility: No free compounds, dG = %.2g" % dG
                else:
                    corrected_reversibility = flux * rev
                    print "\tReversibility %.2g" % corrected_reversibility

                if options.report_mode:
                    for cid, s in reaction.sparse.iteritems():
                        if cid in cmap:
                            print "(%d C%05d) %s\t: %.2g" % (s, cid, kegg.cid2name(cid), cmap[cid])
                        else:
                            print "(%d C%05d) %s\t: Free concentration" % (s, cid, kegg.cid2name(cid))
            except Exception:
                print "\tCouldn't calculate irreversibility"
Esempio n. 18
0
def main():
    options, _ = MakeOpts().parse_args(sys.argv)
    db = SqliteDatabase(options.db_file)
    kegg = Kegg.getInstance()
    
    if options.override_table:
        db.Execute("DROP TABLE IF EXISTS " + options.table_name)
    
    DissociationConstants._CreateDatabase(db, options.table_name, drop_if_exists=options.override_table)

    cids_to_calculate = set()
    if options.nist:
        cids_to_calculate.update(Nist().GetAllCids())
        cids_to_calculate.update(RedoxCarriers().GetAllCids())
        
        ptable = PsuedoisomerTableThermodynamics.FromCsvFile("../data/thermodynamics/formation_energies.csv")
        cids_to_calculate.update(ptable.get_all_cids())
    else:
        cids_to_calculate.update(kegg.get_all_cids())

    for row in db.Execute("SELECT distinct(cid) FROM %s" % options.table_name):
        if row[0] in cids_to_calculate:
            cids_to_calculate.remove(row[0])
    
    cid2smiles_and_mw = {}
    for cid in cids_to_calculate:
        # the compound CO is a special case where the conversion from InChI
        # to SMILES fails, so we add a specific override for it only
        if cid == 237:
            cid2smiles_and_mw[cid] = ("[C-]#[O+]", 28)
            continue
        
        try:
            comp = kegg.cid2compound(cid)
            mol = comp.GetMolecule()
            cid2smiles_and_mw[cid] = (mol.ToSmiles(), mol.GetExactMass())
        except KeggParseException:
            logging.debug("%s (C%05d) has no SMILES, skipping..." %
                          (kegg.cid2name(cid), cid))
        except OpenBabelError:
            logging.debug("%s (C%05d) cannot be converted to SMILES, skipping..." %
                          (kegg.cid2name(cid), cid))
        
    # Do not recalculate pKas for CIDs that are already in the database
    cids_to_calculate = cid2smiles_and_mw.keys()
    cids_to_calculate.sort(key=lambda(cid):(cid2smiles_and_mw[cid][1], cid))
    
    db_lock = threading.Lock()
    semaphore = threading.Semaphore(options.n_threads)
    for cid in cids_to_calculate:
        smiles, _ = cid2smiles_and_mw[cid]
        if not smiles:
            logging.info("The following compound is blacklisted: C%05d" % cid)
            continue

        thread = DissociationThreads(group=None, target=None, name=None,
                                     args=(cid, smiles, semaphore, db_lock, options), kwargs={})
        thread.start()
Esempio n. 19
0
 def CompareOverKegg(self, html_writer, other, fig_name=None):
     """
         Compare the estimation errors of two different evaluation methods
         by calculating all the KEGG reactions which both self and other 
         can estimate, and comparing using a XY plot.
     
         Write results to HTML.
     """
     
     total_list = []
     kegg = Kegg.getInstance()
     
     for rid in sorted(kegg.get_all_rids()):
         reaction = kegg.rid2reaction(rid)
         try:
             reaction.Balance()
             dG0_self =  reaction.PredictReactionEnergy(self, 
                         pH=self.pH, pMg=self.pMg, I=self.I ,T=self.T)
             dG0_other = reaction.PredictReactionEnergy(other,
                         pH=self.pH, pMg=self.pMg, I=self.I ,T=self.T)
         except (MissingCompoundFormationEnergy, MissingReactionEnergy, 
                 KeggReactionNotBalancedException, KeyError):
             continue
             
         total_list.append({'self':dG0_self, 'other':dG0_other, 'rid':rid, 
                            'reaction':reaction})
     
     if not total_list:
         return 0, 0
     
     # plot the profile graph
     plt.rcParams['text.usetex'] = False
     plt.rcParams['legend.fontsize'] = 12
     plt.rcParams['font.family'] = 'sans-serif'
     plt.rcParams['font.size'] = 12
     plt.rcParams['lines.linewidth'] = 2
     plt.rcParams['lines.markersize'] = 6
     plt.rcParams['figure.figsize'] = [6.0, 6.0]
     plt.rcParams['figure.dpi'] = 100
     
     vec_dG0_self = np.array([x['self'] for x in total_list])
     vec_dG0_other = np.array([x['other'] for x in total_list])
     vec_rid = [x['rid'] for x in total_list]
     
     fig = plt.figure()
     fig.hold(True)
     max_dG0 = max(vec_dG0_self.max(), vec_dG0_other.max())
     min_dG0 = min(vec_dG0_self.min(), vec_dG0_other.min())
     plt.plot([min_dG0, max_dG0], [min_dG0, max_dG0], 'k--', figure=fig)
     plt.plot(vec_dG0_self, vec_dG0_other, '.', figure=fig)
     for i, rid in enumerate(vec_rid):
         plt.text(vec_dG0_self[i], vec_dG0_other[i], '%d' % rid, fontsize=6)
     r2 = np.corrcoef(vec_dG0_self, vec_dG0_other)[1, 0]
     plt.title("$\Delta_r G^{'\circ}$ comparison per reaction, $r^2$ = %.2f" % r2)
     plt.xlabel(self.name + ' (in kJ/mol)', figure=fig)
     plt.ylabel(other.name + ' (in kJ/mol)', figure=fig)
     html_writer.embed_matplotlib_figure(fig, width=200, height=200, name=fig_name)
Esempio n. 20
0
def GetConcentrationMap():
    kegg = Kegg.getInstance()
    cmap = GetEmptyConcentrationMap() 
    for cid in kegg.get_all_cids():
        lower, upper = kegg.get_bounds(cid)
        if lower and upper:
            # In the file we got this data from lower = upper 
            cmap[cid] = lower
    return cmap
Esempio n. 21
0
 def ReadKeggCompounds():
     kegg = Kegg.getInstance()
     inchi2KeggID = {}
     inchi2KeggID[None] = 0
     for cid in sorted(kegg.get_all_cids()):
         inchi = kegg.cid2inchi(cid)
         inchi = Feist.NormalizeInChI(inchi)
         if inchi not in inchi2KeggID: # since CIDs are sorted, this will always keep the lowest CID with this InChI
             inchi2KeggID[inchi] = cid
     return inchi2KeggID
Esempio n. 22
0
 def __init__(self, T_range=(298, 314)):
     self.db = SqliteDatabase('../data/public_data.sqlite')
     self.kegg = Kegg.getInstance()
     self.T_range = T_range
     self.pH_range = None
     self.override_I = None
     self.override_pMg = None
     self.override_T = None
     self.FromDatabase()
     self.BalanceReactions()
Esempio n. 23
0
 def GetDissociationTable(self, cid, create_if_missing=True):
     if cid not in self.cid2DissociationTable and create_if_missing:
         try:
             kegg = Kegg.getInstance()
             mol = kegg.cid2mol(cid)
             diss_table = DissociationTable.FromMolecule(mol)
         except KeggParseException:
             diss_table = None
         self.cid2DissociationTable[cid] = diss_table 
     
     return self.cid2DissociationTable.get(cid, None)
Esempio n. 24
0
 def WriteBiochemicalFormationEnergiesToCsv(self, csv_fname):
     kegg = Kegg.getInstance()
     pH, I, pMg, T = self.GetConditions()
     writer = csv.writer(open(csv_fname, 'w'))
     writer.writerow(['name', 'cid', 'pH', 'I', 'pMg', 'T', 'dG0'])
     cids = sorted(self.get_all_cids())
     dG0_prime = self.GetTransformedFormationEnergies(cids)
     for i, cid in enumerate(cids):
         name = kegg.cid2name(cid)
         writer.writerow([name, "C%05d" % cid, pH, I, pMg, T,
                          '%.1f' % dG0_prime[0, i]])
Esempio n. 25
0
 def WriteBiochemicalReactionEnergiesToCsv(self, csv_fname):
     kegg = Kegg.getInstance()
     pH, I, pMg, T = self.GetConditions()
     
     kegg_reactions = kegg.get_all_balanced_reactions()
     writer = csv.writer(open(csv_fname, 'w'))
     writer.writerow(['rid', 'formula', 'pH', 'I', 'pMg', 'T', 'dG0'])
             
     dG0_r = self.GetTransfromedKeggReactionEnergies(kegg_reactions)
     for i, reaction in enumerate(kegg_reactions):
         writer.writerow([reaction.name, reaction.FullReactionString(), pH, I, pMg, T, '%.1f' % float(dG0_r[0, i])])
Esempio n. 26
0
 def write_compound_and_coeff(cid, coeff, show_cids=True):
     if show_cids:
         comp = "C%05d" % cid
     else:
         from pygibbs.kegg import Kegg
         kegg = Kegg.getInstance()
         comp = kegg.cid2name(cid)
     if coeff == 1:
         return comp
     else:
         return "%g %s" % (coeff, comp)
Esempio n. 27
0
 def WriteChemicalFormationEnergiesToCsv(self, csv_fname):
     kegg = Kegg.getInstance()
     writer = csv.writer(open(csv_fname, 'w'))
     writer.writerow(['name', 'cid', 'nH', 'z', 'nMg', 'dG0'])
     for cid in sorted(self.get_all_cids()):
         name = kegg.cid2name(cid)
         try:
             pdata = self.cid2PseudoisomerMap(cid)
             for nH, z, nMg, dG0 in pdata.ToMatrix():
                 writer.writerow([name, "C%05d" % cid, nH, z, nMg, '%.1f' % dG0])
         except MissingCompoundFormationEnergy as e:
             logging.warning(str(e))
def main():
    ptable = PsuedoisomerTableThermodynamics.FromCsvFile(FormationEnergyFileName, label='testing')
    kegg = Kegg.getInstance()
    pH, I, pMg, T = (7.0, 0.25, 14, 298.15)
    
    output_csv = csv.writer(open('../res/formation_energies_transformed.csv', 'w'))
    output_csv.writerow(["cid","name","dG'0","pH","I","pMg","T",
                         "anchor","compound_ref","remark"])
    for cid in ptable.get_all_cids():
        pmap = ptable.cid2PseudoisomerMap(cid)
        dG0_prime = pmap.Transform(pH=pH, I=I, pMg=pMg, T=T)
        output_csv.writerow([cid, kegg.cid2name(cid), "%.1f" % dG0_prime, pH, I, pMg, T,
                             1, ptable.cid2source_string[cid]])
def CreateElementMatrix(thermo):
    kegg = Kegg.getInstance()
    atom_matrix = []
    cids = []
    for cid in thermo.get_all_cids():
        try:
            atom_vector = kegg.cid2compound(cid).get_atom_vector()
        except (KeggParseException, OpenBabelError):
            continue
        if atom_vector is not None:
            cids.append(cid)
            atom_matrix.append(atom_vector)
    atom_matrix = np.array(atom_matrix)
    return cids, atom_matrix
Esempio n. 30
0
    def __init__(self, db, html_writer, thermodynamics):
        self.db = db
        self.html_writer = html_writer
        self.thermo = thermodynamics
        self.kegg = Kegg.getInstance()

        # set the standard redox potential to 320mV and concentrations to 1M 
        # the formation energy will be used only for the dG in the tables
        # but will later be overridden by the value or 'redox' which is
        # determined by the Y-axis in the contour plot. 
        default_E_prime = -0.32 # the E' of NAD(P) at pH 7
        self.thermo.AddPseudoisomer(28, nH=0, z=0, nMg=0, dG0=0) # oxidized electron carrier
        self.thermo.AddPseudoisomer(30, nH=0, z=0, nMg=0, 
                                    dG0=-default_E_prime * F) # reduced electron carrier
Esempio n. 31
0
    def __init__(self,
                 model,
                 thermodynamic_data,
                 metabolite_concentration_bounds,
                 optimization_status=OptimizationStatus.Successful(),
                 optimal_value=None,
                 optimal_ln_metabolite_concentrations=None):
        self.model = model
        self.thermo = thermodynamic_data
        self.bounds = metabolite_concentration_bounds
        self.S = model.GetStoichiometricMatrix()
        self.Ncompounds, self.Nreactions = self.S.shape
        self.status = optimization_status
        self.opt_val = optimal_value
        self.ln_concentrations = optimal_ln_metabolite_concentrations

        self.dGr0_tag = np.array(
            thermodynamic_data.GetDGrTagZero_ForModel(self.model))
        self.dGr0_tag_list = list(self.dGr0_tag.flatten())
        self.compound_ids = self.model.GetCompoundIDs()
        self.reaction_ids = self.model.GetReactionIDs()
        self.fluxes = self.model.GetFluxes()

        self.slug_name = util.slugify(model.name)
        self.pathway_graph_filename = '%s_graph.svg' % self.slug_name
        self.thermo_profile_filename = '%s_thermo_profile.png' % self.slug_name
        self.conc_profile_filename = '%s_conc_profile.png' % self.slug_name
        self.kegg = Kegg.getInstance()

        self.concentrations = None
        self.dGr_tag = None
        self.dGr_tag_list = None
        self.dGr_bio = None
        self.dGr_bio_list = None

        if (self.ln_concentrations is not None and self.dGr0_tag is not None):
            self.concentrations = np.exp(self.ln_concentrations)
            conc_correction = RT * self.ln_concentrations * self.S
            self.dGr_tag = np.array(self.dGr0_tag + conc_correction)
            self.dGr_tag_list = list(self.dGr_tag.flatten())

            bio_concs = self.bounds.GetBoundsWithDefault(self.compound_ids,
                                                         default=1e-3)
            bio_correction = RT * np.dot(np.log(bio_concs), self.S)
            self.dGr_bio = np.array(self.dGr0_tag + bio_correction)
            self.dGr_bio_list = list(self.dGr_bio.flatten())
Esempio n. 32
0
def main():
    kegg = Kegg.getInstance()
    estimators = LoadAllEstimators()
    thermo = estimators['UGC']
    data = LoadGrowthData()

    pdf = PdfPages('../res/growth_rates.pdf')

    for d in data:
        mol = kegg.cid2mol(d['cid'])
        d['mw'] = mol.GetExactMass()
        atom_bag, _ = mol.GetAtomBagAndCharge()
        d['numC'] = atom_bag['C']

        r = GetFullOxidationReaction(d['cid'])
        r.Balance(balance_water=False,
                  balance_hydrogens=True,
                  exception_if_unknown=True)
        print r.FullReactionString(show_cids=False)
        d['dG0'] = r.PredictReactionEnergy(thermo)
        d['total S'] = sum([abs(x) for x in r.sparse.values()])

    PlotEnergy(data, 'growth_rate', 'Specific Growth Rate [1/hr]', pdf)

    fig = plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    CorrPlot([d['sumex'] for d in data], [d['growth_rate'] for d in data],
             [d['carbon_source'] for d in data],
             'SUMEX score',
             'Specific Growth Rate [1/h]',
             figure=fig)
    data_with_dG0 = [d for d in data if np.isfinite(d['dG0'])]
    plt.subplot(1, 2, 2)
    CorrPlot([-d['dG0'] for d in data_with_dG0],
             [d['growth_rate'] for d in data_with_dG0],
             [d['carbon_source'] for d in data_with_dG0],
             r'Oxidation $-\Delta_r G^\circ$ [kJ/mol]',
             'Specific Growth Rate [1/h]',
             figure=fig)
    fig.tight_layout()
    pdf.savefig(fig)

    PlotEnergy(data, 'sumex', 'SUMEX score', pdf)

    pdf.close()
Esempio n. 33
0
 def __init__(self, S, rids, fluxes, cids, formation_energies=None,
              reaction_energies=None, cid2bounds=None, c_range=None,
              T=default_T):
     Pathway.__init__(self, S, formation_energies=formation_energies,
                      reaction_energies=reaction_energies, fluxes=fluxes)
     assert len(cids) == self.Nc
     assert len(rids) == self.Nr
    
     self.rids = rids
     self.cids = cids
     if cid2bounds:
         self.bounds = [cid2bounds.get(cid, (None, None)) for cid in self.cids]
     else:
         self.bounds = None
     self.cid2bounds = cid2bounds
     self.c_range = c_range
     self.T = T
     self.kegg = Kegg.getInstance()
Esempio n. 34
0
def LoadGrowthData():
    kegg = Kegg.getInstance()
    path = '../data/growth/growth_rates_adadi_2012.csv'
    data = []
    for row in csv.DictReader(open(path, 'r')):
        carbon_source = row['carbon source']
        cid, _, _ = kegg.name2cid(carbon_source)
        if cid is None:
            raise Exception("Cannot map compound name to KEGG ID: " +
                            carbon_source)

        data.append({
            'carbon_source': carbon_source,
            'cid': cid,
            'growth_rate': float(row['maximum growth rate measured']),
            'sumex': float(row['SUMEX'])
        })
    return data
Esempio n. 35
0
    def __init__(self, db, html_writer=None, transformed=False):
        """Construct a GroupContribution instance.
        
        Args:
            db: the database handle to read from.
            html_writer: the HtmlWriter to write to.
            kegg: a Kegg instance if you don't want to use the default one.
        """
        PsuedoisomerTableThermodynamics.__init__(self,
                                                 name="Group Contribution")
        self.db = db
        self.html_writer = html_writer or NullHtmlWriter()
        self.dissociation = None
        self.transformed = transformed

        self.epsilon = 1e-10

        self.kegg = Kegg.getInstance()
        self.bounds = deepcopy(self.kegg.cid2bounds)

        self.group_nullspace = None
        self.group_contributions = None
        self.obs_collection = None

        self.cid2error = {}
        self.cid2groupvec = None

        if transformed:
            prefix = 'bgc'
        else:
            prefix = 'pgc'

        self.OBSERVATION_TABLE_NAME = prefix + '_observations'
        self.GROUPVEC_TABLE_NAME = prefix + '_groupvector'
        self.NULLSPACE_TABLE_NAME = prefix + '_nullspace'
        self.CONTRIBUTION_TABLE_NAME = prefix + '_contribution'
        self.REGRESSION_TABLE_NAME = prefix + '_regression'

        self.THERMODYNAMICS_TABLE_NAME = prefix + '_pseudoisomers'
        self.STOICHIOMETRIC_MATRIX_TABLE_NAME = prefix + '_stoichiometry'
        self.ANCHORED_CONTRIBUTIONS_TALBE_NAME = prefix + '_anchored_g'
        self.ANCHORED_CIDS_TABLE_NAME = prefix + '_anchored_cids'
        self.ANCHORED_P_L_TALBE_NAME = prefix + '_anchored_P_L'
def main():
    ptable = PsuedoisomerTableThermodynamics.FromCsvFile(
        FormationEnergyFileName, label='testing')
    kegg = Kegg.getInstance()
    pH, I, pMg, T = (7.0, 0.25, 14, 298.15)

    output_csv = csv.writer(
        open('../res/formation_energies_transformed.csv', 'w'))
    output_csv.writerow([
        "cid", "name", "dG'0", "pH", "I", "pMg", "T", "anchor", "compound_ref",
        "remark"
    ])
    for cid in ptable.get_all_cids():
        pmap = ptable.cid2PseudoisomerMap(cid)
        dG0_prime = pmap.Transform(pH=pH, I=I, pMg=pMg, T=T)
        output_csv.writerow([
            cid,
            kegg.cid2name(cid),
            "%.1f" % dG0_prime, pH, I, pMg, T, 1, ptable.cid2source_string[cid]
        ])
Esempio n. 37
0
def CalculateThermo():
    estimators = LoadAllEstimators()
    parser = MakeOpts(estimators)
    options, args = parser.parse_args(sys.argv)

    kegg = Kegg.getInstance()
    if options.rid is None:
        reaction = GetSparseReactionInput(args[-1], kegg)
    else:
        reaction = kegg.rid2reaction(options.rid)

    estimator = estimators[options.thermodynamics_source]
    pH, I, pMg, T = options.pH, options.I, options.pMg, options.T
    estimator.SetConditions(pH=pH, I=I, pMg=pMg, T=T)

    print "Thermodynamic source:", options.thermodynamics_source
    print('Parameters: pH=%.1f, pMg=%.1f, I=%.2fM, T=%.1fK' %
          (options.pH, options.pMg, options.I, options.T))
    print str(reaction)
    print 'dG\'0 = %.2f [kJ/mol]' % reaction.PredictReactionEnergy(estimator)
Esempio n. 38
0
def main():
    estimators = LoadAllEstimators()
    parser = MakeArgParser(estimators)
    args = parser.parse_args()

    thermo = estimators[args.thermodynamics_source]

    kegg_file = ParsedKeggFile.FromKeggFile(args.config_fname)
    entries = kegg_file.entries()
    if len(entries) == 0:
        raise ValueError('No entries in configuration file')
    entry = 'CONFIGURATION'
    if entry not in entries:
        logging.warning(
            'Configuration file does not contain the entry "CONFIGURATION". '
            'Using the first entry by default: %s' % entries[0])
        entry = entries[0]
    p_data = PathwayData.FromFieldMap(kegg_file[entry])
    thermo.SetConditions(pH=p_data.pH, I=p_data.I, T=p_data.T, pMg=p_data.pMg)
    thermo.c_range = p_data.c_range
    bounds = p_data.GetBounds()

    html_writer = HtmlWriter(args.output_prefix + ".html")

    rowdicts = []
    headers = ['Module', 'Name', 'OBD [kJ/mol]', 'Length']
    kegg = Kegg.getInstance()
    for mid in kegg.get_all_mids():
        html_writer.write('<h2 id=M%05d>M%05d: %s</h2>' %
                          (mid, mid, kegg.get_module_name(mid)))
        try:
            d = AnalyzeKeggModule(thermo, mid, bounds, html_writer)
        except KeyError:
            continue
        d['Module'] = '<a href="#M%05d">M%05d</a>' % (mid, mid)
        d['Name'] = kegg.get_module_name(mid)
        rowdicts.append(d)

    rowdicts.sort(key=lambda x: x['OBD [kJ/mol]'])
    html_writer.write_table(rowdicts, headers, decimal=1)
    html_writer.close()
Esempio n. 39
0
def AnalyzeKeggModule(thermo, mid, bounds, html_writer):
    d = {}
    d['OBD [kJ/mol]'] = "N/A"

    kegg = Kegg.getInstance()
    S, rids, fluxes, cids = kegg.get_module(mid)

    thermo.bounds = bounds.GetOldStyleBounds(cids)
    d['Length'] = len(rids)

    # the S matrix already has the coefficients in the correct direction
    fluxes = [abs(f) for f in fluxes]

    for rid in rids:
        r = kegg.rid2reaction(rid)
        try:
            r.Balance(balance_water=True, exception_if_unknown=True)
        except KeggReactionNotBalancedException:
            logging.warning(
                'R%05d is not a balanced reaction, skipping module' % rid)
            return d

    dG0_r_prime = thermo.GetTransfromedReactionEnergies(S, cids)
    if np.any(np.isnan(dG0_r_prime)):
        logging.warning("Cannot analyze module M%05d because some of the "
                        "Gibbs energies cannot be calculated." % mid)
        return d

    keggpath = KeggPathway(S,
                           rids,
                           fluxes,
                           cids,
                           reaction_energies=dG0_r_prime,
                           cid2bounds=thermo.bounds,
                           c_range=thermo.c_range)
    obd, params = keggpath.FindOBD()
    keggpath.WriteResultsToHtmlTables(html_writer, params['concentrations'],
                                      params['reaction prices'],
                                      params['compound prices'])
    d['OBD [kJ/mol]'] = obd
    return d
Esempio n. 40
0
def CalculateThermo():
    estimators = LoadAllEstimators()
    parser = MakeOpts(estimators)
    options, _ = parser.parse_args(sys.argv)
    if options.input_filename is None:
        sys.stderr.write(parser.get_usage())
        sys.exit(-1)

    estimator = estimators[options.thermodynamics_source]

    pH, I, pMg, T = options.pH, options.I, options.pMg, options.T
    kegg = Kegg.getInstance()

    if options.csv_output_filename is not None:
        out_fp = open(options.csv_output_filename, 'w')
        print "writing results to %s ... " % options.csv_output_filename
    else:
        out_fp = sys.stdout

    entry2fields_map = ParsedKeggFile.FromKeggFile(options.input_filename)
    all_reactions = []
    for key in sorted(entry2fields_map.keys()):
        field_map = entry2fields_map[key]
        p_data = PathwayData.FromFieldMap(field_map)
        if p_data.skip:
            continue

        cid_mapping = p_data.cid_mapping
        field_map = p_data.field_map
        _, _, _, reactions = kegg.parse_explicit_module_to_reactions(
            field_map, cid_mapping)
        all_reactions += reactions
    S, cids = kegg.reaction_list_to_S(all_reactions)
    dG0_r = estimator.GetTransfromedReactionEnergies(S, cids)

    csv_writer = csv.writer(out_fp)
    csv_writer.writerow(['reaction', 'dG0\'', 'pH', 'I', 'pMg', 'T'])

    for r, reaction in enumerate(all_reactions):
        csv_writer.writerow(
            [reaction.FullReactionString(), dG0_r[r, 0], pH, I, pMg, T])
Esempio n. 41
0
def ExportCSVFiles():
    estimators = LoadAllEstimators()
    options, _ = MakeOpts(estimators).parse_args(sys.argv)

    print "Using the thermodynamic estimations of: " + options.thermo_estimator
    thermo = estimators[options.thermo_estimator]
    thermo.pH = float(options.pH)
    thermo.I = float(options.I)
    thermo.pMg = float(options.pMg)
    thermo.T = float(options.T)

    # Make sure we have all the data.
    kegg = Kegg.getInstance()

    print 'Exporting KEGG compounds as JSON.'
    WriteCompoundCSV(kegg.AllCompounds(), thermo,
                     options.compounds_out_filename)

    print 'Exporting KEGG reactions as JSON.'
    WriteReactionCSV(kegg.AllReactions(), thermo,
                     options.reactions_out_filename)
Esempio n. 42
0
def dissociation_decomposition_test():
    """
        Verifies that the decomposition of the compounds in the dissociation table match the nH of each species.
    """
    db = SqliteDatabase('../res/gibbs.sqlite')
    dissociation = DissociationConstants.FromPublicDB()
    groups_data = GroupsData.FromDatabase(db)
    group_decomposer = GroupDecomposer(groups_data)
    kegg = Kegg.getInstance()

    for cid in dissociation.GetAllCids():
        id = "C%05d (%s)" % (cid, kegg.cid2name(cid))
        if kegg.cid2compound(cid).get_atom_bag() is None:
            logging.debug('%s: has no explicit formula' % id)
        else:
            diss = dissociation.GetDissociationTable(cid,
                                                     create_if_missing=False)
            test_dissociation_table(diss,
                                    group_decomposer,
                                    id,
                                    ignore_missing_smiles=True)
Esempio n. 43
0
 def ToDatabase(self, db, table_name, error_table_name=None):
     kegg = Kegg.getInstance()
     db.CreateTable(table_name, "cid INT, nH INT, z INT, nMg INT, "
                    "dG0 REAL, compound_ref TEXT, pseudoisomer_ref TEXT, "
                    "anchor BOOL")
     if error_table_name is not None:
         db.CreateTable(error_table_name, 'cid INT, name TEXT, error TEXT')
     
     for cid in self.get_all_cids():
         compound_ref = self.cid2SourceString(cid)
         try:
             pmap = self.cid2PseudoisomerMap(cid)
             for nH, z, nMg, dG0 in pmap.ToMatrix():
                 pseudo_ref = pmap.GetRef(nH, z, nMg)
                 db.Insert(table_name, [cid, nH, z, nMg, dG0, compound_ref, 
                                        pseudo_ref, cid in self.anchors])
         except MissingCompoundFormationEnergy as e:
             if error_table_name is not None:
                 db.Insert(error_table_name, [cid, kegg.cid2name(cid), str(e)])
             else:
                 logging.warning(str(e))
     db.Commit()
def main():
    pH, I, pMg, T = 7.0, 0.25, 14.0, 298.15

    dissociation = DissociationConstants.FromPublicDB()
    kegg = Kegg.getInstance()
    obs_fname = "../data/thermodynamics/formation_energies.csv"
    res_fname = "../res/formation_energies_transformed.csv"

    train_species = PsuedoisomerTableThermodynamics.FromCsvFile(
        obs_fname, label='testing')
    csv_out = csv.writer(open(res_fname, 'w'))
    csv_out.writerow([
        'cid', 'name', "dG'0", 'pH', 'I', 'pMg', 'T', 'anchor', 'compound_ref',
        'remark'
    ])
    for cid in train_species.get_all_cids():
        pmap = train_species.cid2PseudoisomerMap(cid)
        source = train_species.cid2source_string[cid]
        pmatrix = pmap.ToMatrix(
        )  # ToMatrix returns tuples of (nH, z, nMg, dG0)
        if len(pmatrix) != 1:
            raise Exception("multiple training species for C%05d" % cid)
        nH, charge, nMg, dG0 = pmatrix[0]
        name = "%s (%d)" % (kegg.cid2name(cid), nH)
        logging.info('Adding the formation energy of %s', name)
        diss_table = dissociation.GetDissociationTable(cid,
                                                       create_if_missing=True)
        if diss_table is None:
            raise Exception("%s [C%05d, nH=%d, nMg=%d] does not have a "
                            "dissociation table" % (name, cid, nH, nMg))

        diss_table.SetFormationEnergyByNumHydrogens(dG0, nH, nMg)
        diss_table.SetCharge(nH, charge, nMg)
        dG0_prime = diss_table.Transform(pH, I, pMg, T)
        csv_out.writerow([
            cid,
            kegg.cid2name(cid),
            "%.1f" % dG0_prime, pH, I, pMg, T, True, source, None
        ])
Esempio n. 45
0
def example_formate(thermo, product_cid=22, co2_conc=1e-5):
    co2_hydration = Reaction.FromFormula("C00011 + C00001 => C00288")
    co2_hydration_dG0_prime = float(thermo.GetTransfromedKeggReactionEnergies([co2_hydration]))
    carbonate_conc = co2_conc * np.exp(-co2_hydration_dG0_prime / (R*default_T))
    thermo.bounds[11] = (co2_conc, co2_conc)
    thermo.bounds[288] = (carbonate_conc, carbonate_conc)
    
    pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'),
                    public_db=SqliteDatabase('../data/public_data.sqlite'),
                    html_writer=HtmlWriter('../res/pathologic.html'),
                    thermo=thermo,
                    max_solutions=None,
                    max_reactions=20,
                    maximal_dG=0.0,
                    thermodynamic_method=OptimizationMethods.GLOBAL,
                    update_file=None)
    add_cofactor_reactions(pl, free_ATP_hydrolysis=True)
    add_redox_reactions(pl, NAD_only=False)
   
    pl.delete_reaction(134) # formate:NADP+ oxidoreductase
    pl.delete_reaction(519) # Formate:NAD+ oxidoreductase
    pl.delete_reaction(24) # Rubisco
    pl.delete_reaction(581) # L-serine:NAD+ oxidoreductase (deaminating)
    pl.delete_reaction(220) # L-serine ammonia-lyase
    pl.delete_reaction(13) # glyoxylate carboxy-lyase (dimerizing; tartronate-semialdehyde-forming)
    pl.delete_reaction(585) # L-Serine:pyruvate aminotransferase
    pl.delete_reaction(1440) # D-Xylulose-5-phosphate:formaldehyde glycolaldehydetransferase
    pl.delete_reaction(5338) # 3-hexulose-6-phosphate synthase
    
    
    pl.add_reaction(Reaction.FromFormula("C06265 => C00011", name="CO2 uptake"))
    pl.add_reaction(Reaction.FromFormula("C06265 => C00288", name="carbonate uptake"))
    pl.add_reaction(Reaction.FromFormula("C06265 => C00058", name="formate uptake"))

    r = Reaction.FromFormula("5 C06265 + C00058 => C%05d" % product_cid) # at least one formate to product
    #r.Balance()
    
    kegg = Kegg.getInstance()
    pl.find_path("formate to %s" % kegg.cid2name(product_cid), r)
Esempio n. 46
0
    def __init__(self, use_pKa=True):
        if use_pKa:
            Thermodynamics.__init__(self, "Jankowski et al. (+pKa)")
            self.dissociation = DissociationConstants.FromPublicDB()
        else:
            Thermodynamics.__init__(self, "Jankowski et al.")
            self.dissociation = None
        self.db = SqliteDatabase('../res/gibbs.sqlite', 'w')
        self.cid2pmap_dict = {}

        # the conditions in which Hatzimanikatis makes his predictions
        self.Hatzi_pH = 7.0
        self.Hatzi_I = 0.0
        self.Hatzi_pMg = 14.0
        self.Hatzi_T = 298.15

        self.kegg = Kegg.getInstance()

        # for some reason, Hatzimanikatis doesn't indicate that H+ is zero,
        # so we add it here
        H_pmap = PseudoisomerMap()
        H_pmap.Add(0, 0, 0, 0)
        self.SetPseudoisomerMap(80, H_pmap)

        self.cid2dG0_tag_dict = {80: 0}
        self.cid2charge_dict = {80: 0}

        for row in csv.DictReader(open(HATZI_CSV_FNAME, 'r')):
            cid = int(row['ENTRY'][1:])
            self.cid2source_string[cid] = 'Jankowski et al. 2008'
            if row['DELTAG'] == "Not calculated":
                continue
            if cid == 3178:
                # this compound, which is supposed to be "Tetrahydroxypteridine"
                # seems to be mapped to something else by Hatzimanikatis
                continue
            self.cid2dG0_tag_dict[cid] = float(row['DELTAG']) * J_per_cal
            self.cid2charge_dict[cid] = int(row['CHARGE'])
Esempio n. 47
0
 def WriteDataToHtml(self, html_writer):
     kegg = Kegg.getInstance()
     rowdicts = []
     for cid in self.get_all_cids():
         pdata = self.cid2PseudoisomerMap(cid)
         for nH, z, nMg, dG0 in pdata.ToMatrix():
             rowdict = {}
             rowdict['KEGG ID'] = 'C%05d' % cid
             rowdict['name'] = kegg.cid2name(cid)
             rowdict['nH'] = nH
             rowdict['z'] = z
             rowdict['nMg'] = nMg
             rowdict[symbol_df_G0] = dG0
             rowdict['reference'] = pdata.GetRef(nH, z, nMg)
             if cid in self.anchors:
                 rowdict['anchor'] = 'yes'
             else:
                 rowdict['anchor'] = 'no'
             rowdicts.append(rowdict)
     
     headers = ['KEGG ID', 'name', 'nH', 'z', 'nMg', symbol_df_G0,
                'reference', 'anchor']
     html_writer.write_table(rowdicts, headers, decimal=1)
Esempio n. 48
0
def nist_dissociation_test():
    """
        Verifies that all the compounds in NIST are covered by the dissociation table, including SMILES strings.
    """
    db = SqliteDatabase('../res/gibbs.sqlite')
    nist_regression = NistRegression(db, html_writer=NullHtmlWriter())
    dissociation = nist_regression.dissociation
    groups_data = GroupsData.FromDatabase(db)
    group_decomposer = GroupDecomposer(groups_data)
    kegg = Kegg.getInstance()

    nist = nist_regression.nist
    for cid in nist.GetAllCids():
        id = "C%05d (%s)" % (cid, kegg.cid2name(cid))
        if kegg.cid2compound(cid).get_atom_bag() is None:
            logging.debug('%s: has no explicit formula' % id)
        else:
            diss = dissociation.GetDissociationTable(cid,
                                                     create_if_missing=False)
            test_dissociation_table(diss,
                                    group_decomposer,
                                    id,
                                    ignore_missing_smiles=False)
Esempio n. 49
0
def ExportJSONFiles():
    estimators = LoadAllEstimators()
    options, _ = MakeOpts(estimators).parse_args(sys.argv)

    thermo_list = []
    thermo_list.append(estimators[options.thermodynamics_source])
    thermo_list.append(
        PsuedoisomerTableThermodynamics.FromCsvFile(
            options.thermodynamics_csv))

    # Make sure we have all the data.
    kegg = Kegg.getInstance()
    for i, thermo in enumerate(thermo_list):
        print "Priority %d - formation energies of: %s" % (i + 1, thermo.name)
        kegg.AddThermodynamicData(thermo, priority=(i + 1))

    db = SqliteDatabase('../res/gibbs.sqlite')

    print 'Exporting Group Contribution Nullspace matrix as JSON.'
    nullspace_vectors = []
    for row in db.DictReader('ugc_conservations'):
        d = {'msg': row['msg']}
        sparse = json.loads(row['json'])
        d['reaction'] = []
        for cid, coeff in sparse.iteritems():
            d['reaction'].append([coeff, "C%05d" % int(cid)])
        nullspace_vectors.append(d)
    WriteJSONFile(nullspace_vectors, options.nullspace_out_filename)

    print 'Exporting KEGG compounds as JSON.'
    WriteJSONFile(kegg.AllCompounds(), options.compounds_out_filename)

    print 'Exporting KEGG reactions as JSON.'
    WriteJSONFile(kegg.AllReactions(), options.reactions_out_filename)

    print 'Exporting KEGG enzymes as JSON.'
    WriteJSONFile(kegg.AllEnzymes(), options.enzymes_out_filename)
Esempio n. 50
0
def main():
    options, _ = flags.MakeOpts().parse_args(sys.argv)
    kegg = Kegg.getInstance()
    estimators = LoadAllEstimators()
    
    print ('Parameters: T=%f K, pH=%.2g, pMg=%.2g, '
           'I=%.2gmM, Median concentration=%.2gM' % 
           (default_T, options.ph, options.pmg, options.i_s, options.c_mid))

    for thermo in estimators.values():
        thermo.c_mid = options.c_mid
        thermo.pH = options.ph
        thermo.pMg = options.pmg
        thermo.I = options.i_s
        thermo.T = default_T
    
    cmap = {}
    if not options.ignore_cofactors:
        print 'Fixing concentrations of co-factors'
        cmap = reversibility.GetConcentrationMap()
    else:
        print 'Not fixing concentrations of co-factors'

    while True:
        rid = GetReactionIdInput()        
        reaction = kegg.rid2reaction(rid)
        print 'Reaction Name: %s' % reaction.name
        print '\tKegg ID: R%05d' % rid
        print '\tEC: %s' % str(reaction.ec_list)
        for key, thermo in estimators.iteritems():
            print "\t<< %s >>" % key
            try:
                print '\t\tdG0\'f = %.1f kJ/mol' % reaction.PredictReactionEnergy(thermo)
                rev = reversibility.CalculateReversability(reaction, thermo, concentration_map=cmap)
                print '\t\tgamma = %.3g' % rev
            except Exception as e: 
                print '\tError: %s' % (str(e))
Esempio n. 51
0
 def __init__(self):
     self.cid2DissociationTable = {}
     self.kegg = Kegg.getInstance()
Esempio n. 52
0
def main():
    options, _ = flags.MakeOpts().parse_args(sys.argv)
    c_mid = options.c_mid
    pH = options.ph
    pMg = options.pmg
    I = options.i_s
    T = default_T

    db = SqliteDatabase('../res/gibbs.sqlite')
    kegg = Kegg.getInstance()
    G = GroupContribution(db)
    G.init()

    print(
        'Parameters: T=%f K, pH=%.2g, pMg=%.2g, '
        'I=%.2gM, Median concentration=%.2gM' % (T, pH, pMg, I, c_mid))

    cmap = {}
    if not options.ignore_cofactors:
        if options.full_metabolites:
            print 'Fixing concentrations of all known metabolites'
            cmap = reversibility.GetFullConcentrationMap(G)
        else:
            print 'Fixing concentrations of co-factors'
            cmap = reversibility.GetConcentrationMap(kegg)
    else:
        print 'Not fixing concentrations of co-factors'

    if options.report_mode:
        print 'Output used metabolites concentrations'

    while True:
        mid = GetModuleIdInput()

        rid_flux_list = kegg.mid2rid_map[mid]

        for rid, flux in rid_flux_list:
            try:
                reaction = kegg.rid2reaction(rid)
                print 'Reaction Name', reaction.name
                print '\tKegg Id', reaction.rid
                print '\tEC', reaction.ec_list
                rev = reversibility.CalculateReversability(
                    reaction.sparse,
                    G,
                    pH=pH,
                    I=I,
                    pMg=pMg,
                    T=T,
                    concentration_map=cmap)
                if rev == None:
                    dG = G.estimate_dG_reaction(reaction.sparse,
                                                pH=pH,
                                                pMg=pMg,
                                                I=I,
                                                T=T,
                                                c0=c_mid,
                                                media='glucose')
                    print '\tReversibility: No free compounds, dG = %.2g' % dG
                else:
                    corrected_reversibility = flux * rev
                    print '\tReversibility %.2g' % corrected_reversibility

                if options.report_mode:
                    for cid, s in reaction.sparse.iteritems():
                        if cid in cmap:
                            print '(%d C%05d) %s\t: %.2g' % (
                                s, cid, kegg.cid2name(cid), cmap[cid])
                        else:
                            print '(%d C%05d) %s\t: Free concentration' % (
                                s, cid, kegg.cid2name(cid))
            except Exception:
                print '\tCouldn\'t calculate irreversibility'
Esempio n. 53
0
def main():
    kegg = Kegg.getInstance()
    options, args = MakeOpts().parse_args(sys.argv)
    print ('Parameters: T=%f K, pMg=%.2g, I=%.2gM' % 
           (options.T, options.pMg, options.I))
    print "reaction:", args[-1]

    estimators = LoadAllEstimators()
    
    plt.rcParams['legend.fontsize'] = 8
    plt.rcParams['font.family'] = 'sans-serif'
    plt.rcParams['font.size'] = 12
    plt.rcParams['lines.linewidth'] = 2
    
    colormap = {}
    colormap['markers'] = (64.0/255, 111.0/255, 29.0/255, 3.0)
    #colormap['hatzi_gc'] = (54.0/255, 182.0/255, 202.0/255, 1.0)
    colormap['UGC'] = (202.0/255, 101.0/255, 54.0/255, 1.0)
    #colormap['alberty'] = (202.0/255, 54.0/255, 101.0/255, 1.0)
    colormap['PGC'] = (101.0/255, 202.0/255, 54.0/255, 1.0)
    
    fig = plt.figure(figsize=(6,6), dpi=90)
    
    fig.hold(True)
    if options.rid is None:
        reaction = GetSparseReactionInput(args[-1], kegg)
    else:
        reaction = kegg.rid2reaction(options.rid)
    reaction.Balance()
    print 'Reaction: %s' % reaction.FullReactionString()

    nist = Nist()
    nist_rows = nist.SelectRowsFromNist(reaction, check_reverse=True)

    pH_min = 7.0 - options.pH/2.0
    pH_max = 7.0 + options.pH/2.0
    
    if nist_rows:
        dG0_list = []
        pH_list = []
        for row_data in nist_rows:
            pH_list.append(row_data.pH)
            if row_data.reaction == reaction:
                dG0_list.append(row_data.dG0_r)
            else:
                dG0_list.append(-row_data.dG0_r)
    
        plt.plot(pH_list, dG0_list, marker='.', linestyle='none',
                   label='measured data', markeredgecolor='none',
                   markerfacecolor=colormap['markers'], markersize=5)
        pH_max = max(pH_list + [pH_max])
        pH_min = min(pH_list + [pH_min])
    
    pH_range = np.arange(pH_min-0.1, pH_max+0.1, 0.02)
    for key, thermo in estimators.iteritems():
        if key not in colormap:
            continue
        print key, 'dG0 at pH=7: %.2f' % reaction.PredictReactionEnergy(thermo, 
                pH=7.0, pMg=options.pMg, I=options.I, T=options.T)
        dG0 = []
        for pH in pH_range:
            dG0.append(reaction.PredictReactionEnergy(thermo, 
                pH=pH, pMg=options.pMg, I=options.I, T=options.T))
        plt.plot(pH_range, dG0, marker='None', linestyle='solid', color=colormap[key],
                   figure=fig, label=thermo.name)

    plt.xlabel('pH')
    plt.ylabel(r'$\Delta_r G^\circ$ [kJ/mol]')
    plt.title(kegg.reaction2string(reaction), fontsize=8)
    plt.legend(loc='lower left')

    if not options.output:
        plt.tight_layout()
        plt.show()
    else:
        fig.savefig(options.output, format='svg')
Esempio n. 54
0
def main():
    options, _ = MakeOpts().parse_args(sys.argv)
    db = SqliteDatabase(options.db_file)
    kegg = Kegg.getInstance()

    if options.override_table:
        db.Execute("DROP TABLE IF EXISTS " + options.table_name)

    DissociationConstants._CreateDatabase(
        db, options.table_name, drop_if_exists=options.override_table)

    cids_to_calculate = set()
    if options.nist:
        cids_to_calculate.update(Nist().GetAllCids())
        cids_to_calculate.update(RedoxCarriers().GetAllCids())

        ptable = PsuedoisomerTableThermodynamics.FromCsvFile(
            "../data/thermodynamics/formation_energies.csv")
        cids_to_calculate.update(ptable.get_all_cids())
    else:
        cids_to_calculate.update(kegg.get_all_cids())

    for row in db.Execute("SELECT distinct(cid) FROM %s" % options.table_name):
        if row[0] in cids_to_calculate:
            cids_to_calculate.remove(row[0])

    cid2smiles_and_mw = {}
    for cid in cids_to_calculate:
        # the compound CO is a special case where the conversion from InChI
        # to SMILES fails, so we add a specific override for it only
        if cid == 237:
            cid2smiles_and_mw[cid] = ("[C-]#[O+]", 28)
            continue

        try:
            comp = kegg.cid2compound(cid)
            mol = comp.GetMolecule()
            cid2smiles_and_mw[cid] = (mol.ToSmiles(), mol.GetExactMass())
        except KeggParseException:
            logging.debug("%s (C%05d) has no SMILES, skipping..." %
                          (kegg.cid2name(cid), cid))
        except OpenBabelError:
            logging.debug(
                "%s (C%05d) cannot be converted to SMILES, skipping..." %
                (kegg.cid2name(cid), cid))

    # Do not recalculate pKas for CIDs that are already in the database
    cids_to_calculate = cid2smiles_and_mw.keys()
    cids_to_calculate.sort(key=lambda (cid): (cid2smiles_and_mw[cid][1], cid))

    db_lock = threading.Lock()
    semaphore = threading.Semaphore(options.n_threads)
    for cid in cids_to_calculate:
        smiles, _ = cid2smiles_and_mw[cid]
        if not smiles:
            logging.info("The following compound is blacklisted: C%05d" % cid)
            continue

        thread = DissociationThreads(group=None,
                                     target=None,
                                     name=None,
                                     args=(cid, smiles, semaphore, db_lock,
                                           options),
                                     kwargs={})
        thread.start()
Esempio n. 55
0
def main():
    html_writer = HtmlWriter("../res/nist/report.html")
    estimators = LoadAllEstimators()
    nist = Nist()
    nist.T_range = (273.15 + 24, 273.15 + 40)
    #nist.override_I = 0.25
    #nist.override_pMg = 14.0
    #nist.override_T = 298.15

    html_writer.write('<p>\n')
    html_writer.write("Total number of reaction in NIST: %d</br>\n" %
                      len(nist.data))
    html_writer.write("Total number of reaction in range %.1fK < T < %.1fK: %d</br>\n" % \
                      (nist.T_range[0], nist.T_range[1], len(nist.SelectRowsFromNist())))
    html_writer.write('</p>\n')

    reactions = {}
    reactions['KEGG'] = []
    for reaction in Kegg.getInstance().AllReactions():
        try:
            reaction.Balance(balance_water=True, exception_if_unknown=True)
            reactions['KEGG'].append(reaction)
        except (KeggReactionNotBalancedException, KeggParseException,
                OpenBabelError):
            pass

    reactions['FEIST'] = Feist.FromFiles().reactions
    reactions['NIST'] = nist.GetUniqueReactionSet()

    pairs = []
    #pairs += [('hatzi_gc', 'UGC')], ('PGC', 'PRC'), ('alberty', 'PRC')]
    for t1, t2 in pairs:
        logging.info('Writing the NIST report for %s vs. %s' %
                     (estimators[t1].name, estimators[t2].name))
        html_writer.write('<p><b>%s vs. %s</b> ' %
                          (estimators[t1].name, estimators[t2].name))
        html_writer.insert_toggle(start_here=True)
        two_way_comparison(html_writer=html_writer,
                           thermo1=estimators[t1],
                           thermo2=estimators[t2],
                           reaction_list=reactions['FEIST'],
                           name='%s_vs_%s' % (t1, t2))
        html_writer.div_end()
        html_writer.write('</p>')

    if False:
        estimators['alberty'].CompareOverKegg(
            html_writer,
            other=estimators['PRC'],
            fig_name='kegg_compare_alberty_vs_nist')

    rowdicts = []
    rowdict = {'Method': 'Total'}
    for db_name, reaction_list in reactions.iteritems():
        rowdict[db_name + ' coverage'] = len(reaction_list)
    rowdicts.append(rowdict)

    for name in ['UGC', 'PGC', 'PRC', 'alberty', 'merged', 'hatzi_gc']:
        thermo = estimators[name]
        logging.info('Writing the NIST report for %s' % thermo.name)
        html_writer.write('<p><b>%s</b> ' % thermo.name)
        html_writer.insert_toggle(start_here=True)
        num_estimations, rmse = nist.verify_results(html_writer=html_writer,
                                                    thermodynamics=thermo,
                                                    name=name)
        html_writer.div_end()
        html_writer.write('N = %d, RMSE = %.1f</p>\n' %
                          (num_estimations, rmse))
        logging.info('N = %d, RMSE = %.1f' % (num_estimations, rmse))

        rowdict = {
            'Method': thermo.name,
            'RMSE (kJ/mol)': "%.1f (N=%d)" % (rmse, num_estimations)
        }
        for db_name, reaction_list in reactions.iteritems():
            n_covered = thermo.CalculateCoverage(reaction_list)
            percent = n_covered * 100.0 / len(reaction_list)
            rowdict[db_name +
                    " coverage"] = "%.1f%% (%d)" % (percent, n_covered)
            logging.info(db_name + " coverage = %.1f%%" % percent)
        rowdicts.append(rowdict)

    headers = ['Method', 'RMSE (kJ/mol)'] + \
        [db_name + ' coverage' for db_name in reactions.keys()]
    html_writer.write_table(rowdicts, headers=headers)
Esempio n. 56
0
def thermodynamic_pathway_analysis(S, rids, fluxes, cids, thermodynamics,
                                   html_writer):
    Nr, Nc = S.shape

    # adjust the directions of the reactions in S to fit the fluxes
    fluxes = map(abs, fluxes)
    kegg = Kegg.getInstance()

    #kegg.write_reactions_to_html(html_writer, S, rids, fluxes, cids, show_cids=False)
    dG0_f = thermodynamics.GetTransformedFormationEnergies(cids)
    bounds = [thermodynamics.bounds.get(cid, (None, None)) for cid in cids]
    res = {}
    try:
        c_mid = thermodynamics.c_mid
        c_range = thermodynamics.c_range
        res['pCr'] = find_pCr(S, dG0_f, c_mid=c_mid, ratio=3.0, bounds=bounds)
        #res['PCR2'] = find_unfeasible_concentrations(S, dG0_f, c_range, c_mid=c_mid, bounds=bounds)
        res['MTDF'] = find_mtdf(S, dG0_f, c_range=c_range, bounds=bounds)

        #path = pathway_modelling.Pathway(S, dG0_f)
        #res['pCr_regularized'] = path.FindPcr_OptimizeConcentrations(
        #    c_mid=c_mid, ratio=3.0, bounds=bounds)
        #res['pCr_regularized (dGr < -2.7)'] = path.FindPcr_OptimizeConcentrations(
        #    c_mid=c_mid, ratio=3.0, bounds=bounds, max_reaction_dg=-2.7)
        #res['MTDF_regularized'] = path.FindMTDF_OptimizeConcentrations(
        #    c_range=c_range, bounds=bounds, c_mid=c_mid)

        #costs = []
        #for max_dg in pylab.arange(0.0,-4.25,-0.25):
        #    c = path.FindPcrEnzymeCost(c_mid=c_mid,
        #                               ratio=3.0,
        #                               bounds=bounds,
        #                               max_reaction_dg=max_dg,
        #                               fluxes=fluxes)
        #    costs.append(str(c))

        #print ', '.join(costs)

    except LinProgNoSolutionException:
        html_writer.write(
            '<b>No feasible solution found, cannot calculate the Margin</b>')

    # plot the profile graph
    pylab.rcParams['text.usetex'] = False
    pylab.rcParams['legend.fontsize'] = 10
    pylab.rcParams['font.family'] = 'sans-serif'
    pylab.rcParams['font.size'] = 12
    pylab.rcParams['lines.linewidth'] = 2
    pylab.rcParams['lines.markersize'] = 5
    pylab.rcParams['figure.figsize'] = [8.0, 6.0]
    pylab.rcParams['figure.dpi'] = 100

    # plot the thermodynamic profile in standard conditions

    profile_fig = pylab.figure()
    profile_fig.hold(True)

    pylab.title('Thermodynamic profile', figure=profile_fig)
    pylab.ylabel('cumulative dG [kJ/mol]', figure=profile_fig)
    pylab.xlabel('Reaction KEGG ID', figure=profile_fig)
    pylab.xticks(pylab.arange(1, Nr + 1),
                 ['R%05d' % rids[i] for i in xrange(Nr)],
                 fontproperties=FontProperties(size=8),
                 rotation=30)

    dG0_r = pylab.zeros((Nr, 1))
    for r in range(Nr):
        reactants = pylab.find(S[r, :])
        dG0_r[r, 0] = pylab.dot(S[r, reactants], dG0_f[reactants])

    nan_indices = pylab.find(pylab.isnan(dG0_r))
    finite_indices = pylab.find(pylab.isfinite(dG0_r))
    if (len(nan_indices) > 0):
        dG0_r_finite = pylab.zeros((Nr, 1))
        dG0_r_finite[finite_indices] = dG0_r[finite_indices]
        cum_dG0_r = pylab.cumsum(
            [0] + [dG0_r_finite[r, 0] * fluxes[r] for r in range(Nr)])
    else:
        cum_dG0_r = pylab.cumsum([0] +
                                 [dG0_r[r, 0] * fluxes[r] for r in range(Nr)])
    pylab.plot(pylab.arange(0.5, Nr + 1),
               cum_dG0_r,
               figure=profile_fig,
               label='Standard [1M]')

    # plot the thermodynamic profile for the different optimization schemes

    pylab.grid(True, figure=profile_fig)
    for optimization in res.keys():
        dG_f, conc, score = res[optimization]
        if score is None:
            continue

        dG_r = pylab.dot(S, dG_f)
        cum_dG_r = pylab.cumsum([0] +
                                [dG_r[i, 0] * fluxes[i] for i in range(Nr)])
        pylab.plot(pylab.arange(0.5, Nr + 1),
                   cum_dG_r,
                   figure=profile_fig,
                   label='%s = %.1f' % (optimization, score))

    pylab.legend()
    html_writer.embed_matplotlib_figure(profile_fig, width=480, height=360)

    # plot the optimal metabolite concentrations for the different optimization schemes
    ind_nan = pylab.find(pylab.isnan(dG0_f))
    for optimization in res.keys():
        dG_f, conc, score = res[optimization]
        if score is None:
            continue

        dG_r = pylab.dot(S, dG_f)
        conc[
            ind_nan] = thermodynamics.c_mid  # give all compounds with unknown dG0_f the middle concentration value

        conc_fig = pylab.figure()
        conc_fig.suptitle('Concentrations (%s = %.1f)' % (optimization, score))
        pylab.xscale('log', figure=conc_fig)
        pylab.ylabel('Compound KEGG ID', figure=conc_fig)
        pylab.xlabel('Concentration [M]', figure=conc_fig)
        pylab.yticks(range(Nc, 0, -1), ["C%05d" % cid for cid in cids],
                     fontproperties=FontProperties(size=8))
        pylab.plot(conc, range(Nc, 0, -1), '*b', figure=conc_fig)

        x_min = conc.min() / 10
        x_max = conc.max() * 10
        y_min = 0
        y_max = Nc + 1

        for c in range(Nc):
            pylab.text(conc[c, 0] * 1.1, Nc - c, kegg.cid2name(cids[c]), \
                       figure=conc_fig, fontsize=6, rotation=0)
            b_low, b_up = bounds[c]
            if b_low is None:
                b_low = x_min
            if b_up is None:
                b_up = x_max
            pylab.plot([b_low, b_up], [Nc - c, Nc - c], '-k', linewidth=0.4)

        if optimization.startswith('pCr'):
            c_range_opt = pC_to_range(score,
                                      c_mid=thermodynamics.c_mid,
                                      ratio=3.0)
            pylab.axvspan(c_range_opt[0],
                          c_range_opt[1],
                          facecolor='g',
                          alpha=0.3,
                          figure=conc_fig)
        else:
            pylab.axvspan(thermodynamics.c_range[0],
                          thermodynamics.c_range[1],
                          facecolor='r',
                          alpha=0.3,
                          figure=conc_fig)
        pylab.axis([x_min, x_max, y_min, y_max], figure=conc_fig)
        try:
            html_writer.embed_matplotlib_figure(conc_fig,
                                                width=420,
                                                height=360)
        except AttributeError:
            html_writer.write('<b>Failed to generate concentration figure</b>')

    # write all the results in tables as well

    for optimization in res.keys():
        (dG_f, conc, score) = res[optimization]
        html_writer.write(
            '<p>Biochemical Compound Formation Energies (%s = %.1f)<br>\n' %
            (optimization, score))
        html_writer.write('<table border="1">\n')
        html_writer.write('  ' + '<td>%s</td>' * 5 %
                          ("KEGG CID", "Compound Name", "Concentration [M]",
                           "dG'0_f [kJ/mol]", "dG'_f [kJ/mol]") + '\n')
        for c in range(Nc):
            cid = cids[c]
            name = kegg.cid2name(cid)

            if (pylab.isnan(dG0_f[c, 0])):
                html_writer.write('<tr><td><a href="%s">C%05d</a></td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>\n' % \
                                  (kegg.cid2link(cid), cid, name, "N/A", "N/A", "N/A"))
            else:
                html_writer.write('<tr><td><a href="%s">C%05d</a></td><td>%s</td><td>%.2g</td><td>%.2f</td><td>%.2f</td></tr>\n' % \
                                  (kegg.cid2link(cid), cid, name, conc[c, 0], dG0_f[c, 0], dG_f[c, 0]))
        html_writer.write('</table></p>\n')

        html_writer.write(
            '<p>Biochemical Reaction Energies (%s = %.1f)<br>\n' %
            (optimization, score))
        html_writer.write('<table border="1">\n')
        html_writer.write('  ' + '<td>%s</td>' * 3 %
                          ("KEGG RID", "dG'0_r [kJ/mol]", "dG'_r [kJ/mol]") +
                          '\n')
        dG_r = pylab.dot(S, dG_f)
        for r in range(Nr):
            rid = rids[r]
            if (pylab.isnan(dG0_r[r, 0])):
                html_writer.write('<tr><td><a href="%s" title="%s">R%05d</a></td><td>%s</td><td>%.2f</td></tr>\n' % \
                                  (kegg.rid2link(rid), kegg.rid2name(rid), rid, "N/A", dG_r[r, 0]))
            else:
                html_writer.write('<tr><td><a href="%s" title="%s">R%05d</a></td><td>%.2f</td><td>%.2f</td></tr>\n' % \
                                  (kegg.rid2link(rid), kegg.rid2name(rid), rid, dG0_r[r, 0], dG_r[r, 0]))
        html_writer.write('</table></p>\n')

    return res
 def __init__(self, db, html_writer, thermodynamics, kegg=None):
     self.db = db
     self.html_writer = html_writer
     self.thermo = thermodynamics
     self.kegg = kegg or Kegg.getInstance()
     self.pathways = {}
 def row2hypertext(S_row, cids):
     kegg = Kegg.getInstance()
     active_cids = list(np.nonzero(S_row)[0].flat)
     sparse = dict((cids[c], S_row[c]) for c in active_cids)
     return kegg.sparse_to_hypertext(sparse, show_cids=False)
Esempio n. 59
0
    #print m.ToFormat('mol2')
    #print m.ToFormat('smi')
    #print m.ToFormat('inchi')
    #print m.ToFormat('sdf')

    diss_table = Molecule._GetDissociationTable('C(=O)(O)CN',
                                                fmt='smiles',
                                                mid_pH=default_pH,
                                                min_pKa=0,
                                                max_pKa=14,
                                                T=default_T)
    print "glycine\n", diss_table

    html_writer = HtmlWriter('../res/molecule.html')
    from pygibbs.kegg import Kegg
    kegg = Kegg.getInstance()
    html_writer.write('<h1>pKa estimation using ChemAxon</h1>\n')
    for cid in [41]:
        m = kegg.cid2mol(cid)
        html_writer.write("<h2>C%05d : %s</h2>\n" % (cid, str(m)))
        diss_table = m.GetDissociationTable()
        pmap = diss_table.GetPseudoisomerMap()
        diss_table.WriteToHTML(html_writer)
        pmap.WriteToHTML(html_writer)
        html_writer.write("</p>\n")
        #print m.GetDissociationConstants()
        #print m.GetMacrospecies()

    #obmol = m.ToOBMol()
    #print 'atom bag = %s, charge = %d' % m.GetAtomBagAndCharge()
    #print 'no. e- =', m.GetNumElectrons()
Esempio n. 60
0
    def FromFileToDB(file_name, db, table_name):
        """
            Parses a CSV file that contains pKa and pKMg data for many compounds
            and returns a dictionary of their DissociationTables, where the key
            is the CID.
            
            We support to CSV formats (legacy issues, sorry):
            1) cid, name, nH_below, nH_above, nMg_below, nMg_above, mol_below, mol_above, ddG, ref
            2) cid, name, type, T, nH_below, nH_above, nMg_below, nMg_above, mol_below, mol_above, pK, ref
        """

        kegg = Kegg.getInstance()
        DissociationConstants._CreateDatabase(db, table_name)

        for i, row in enumerate(csv.DictReader(open(file_name, 'r'))):
            if 'pK' not in row and 'ddG' not in row:
                raise Exception("The CSV file is not in a recognized format: "
                                "there should be a column named ddG or pK")
            try:
                if not row['cid']:
                    continue  # without a CID we cannot match this to the dG0 table
                cid = int(row['cid'])
                name = row['name'] or kegg.cid2name(cid)
                logging.debug("Parsing row #%d, compound %s (C%05d)" %
                              (i, name, cid))

                nH_below = int(row['nH_below'])
                nH_above = int(row['nH_above'])
                nMg_below = int(row['nMg_below'])
                nMg_above = int(row['nMg_above'])
                mol_below = row['mol_below'] or None
                mol_above = row['mol_above'] or None
                ref = row['ref']

                if 'ddG' in row:  # this is the 1st format type
                    ddG = float(row['ddG'])
                elif 'pK' in row:  # this is the 2nd format type
                    pK = float(row['pK'] or 0)
                    T = float(row['T'] or default_T)
                    if row['type'] == 'acid-base':
                        if nMg_below != nMg_above or nH_below != nH_above + 1:
                            raise Exception('wrong nMg and nH values')
                        ddG = -R * T * np.log(10) * pK
                    elif row['type'] == 'Mg':
                        if nMg_below != nMg_above + 1 or nH_below != nH_above:
                            raise Exception('wrong nMg and nH values')
                        ddG = -R * T * np.log(10) * pK + dG0_f_Mg
                    elif row['type'] == '':
                        if nMg_below != nMg_above or nH_below != nH_above:
                            raise Exception('wrong nMg and nH values')
                        ddG = None
                    else:
                        raise Exception('unknown dissociation type: ' +
                                        row['type'])

            except Exception as e:
                raise Exception("Row %i: %s" % (i, str(e)))

            db.Insert(table_name, [
                cid, name, nH_below, nH_above, nMg_below, nMg_above, mol_below,
                mol_above, ddG, ref
            ])

        db.Commit()