Beispiel #1
0
def get_dgf_priors(mi: MaudInput) -> Tuple[pd.Series, pd.DataFrame]:
    """Given a Maud input, get a multivariate prior from equilibrator.

    Returns a pandas Series of prior means and a pandas DataFrame of
    covariances. Both are indexed by metabolite ids.

    :param mi: A MaudInput object

    """
    cc = ComponentContribution()
    mu = []
    sigmas_fin = []
    sigmas_inf = []
    external_ids = {m.id: m.inchi_key for m in mi.kinetic_model.metabolites}
    met_ix = pd.Index(mi.stan_coords.metabolites, name="metabolite")
    for met_id in met_ix:
        external_id = external_ids[met_id]
        if external_id is None:
            raise ValueError(f"metabolite {met_id} has no external id.")
        c = cc.get_compound(external_id)
        if isinstance(c, Compound):
            mu_c, sigma_fin_c, sigma_inf_c = cc.standard_dg_formation(c)
            mu.append(mu_c)
            sigmas_fin.append(sigma_fin_c)
            sigmas_inf.append(sigma_inf_c)
        else:
            raise ValueError(f"cannot find compound for metabolite {met_id}"
                             f" with external id {external_id}.")
    sigmas_fin = np.array(sigmas_fin)
    sigmas_inf = np.array(sigmas_inf)
    cov = sigmas_fin @ sigmas_fin.T + 1e6 * sigmas_inf @ sigmas_inf.T
    return (
        pd.Series(mu, index=met_ix, name="prior_mean_dgf").round(10),
        pd.DataFrame(cov, index=met_ix, columns=met_ix).round(10),
    )
Beispiel #2
0
def build_thermo_from_equilibrator(model, T=TEMPERATURE_0):
    """Build `thermo_data` structure from a cobra Model.

    The structure of the returned dictionary is specified in the pyTFA
    [documentation](https://pytfa.readthedocs.io/en/latest/thermoDB.html).

    :param model: cobra.Model
    :return thermo_data: dict
        to be passed as argument to initialize a `ThermoModel`.

    """
    global ccache
    if ccache is None:
        ccache = create_compound_cache_from_quilt()
        logger.debug("eQuilibrator compound cache is loaded.")

    cc = ComponentContribution()
    cc.temperature = Q_(str(T) + "K")

    thermo_data = {"name": "eQuilibrator", "units": "kJ/mol", "cues": {}}
    met_to_comps = compat.map_cobra_metabolites(ccache, model.metabolites)
    thermo_data["metabolites"] = [
        compound_to_entry(met, cc) for met in met_to_comps
    ]
    return thermo_data
Beispiel #3
0
def search_equilibrator_compound(
    cc: ComponentContribution,
    id: str = None,
    inchikey: str = None,
    inchi: str = None,
    smiles: str = None,
    logger: Logger = getLogger(__name__)
) -> Dict[str, str]:

    def copy_data(
        compound,
        data: Dict,
        overwrite: bool = False,
    ) -> Dict:
        # ...copy initial data into result compound
        _compound = deepcopy(data)
        # fill with eQuilibrator data for empty fields
        for k, v in _compound.items():
            if (
                overwrite
                or v is None
                or v == ''
            ): _compound[k] = getattr(compound, k)
        # keep the key known by eQuilibrator
        _compound['cc_key'] = key
        # keep the value known by eQuilibrator
        _compound[key] = val
        return _compound

    data = {
        'id': id,
        'inchi_key': inchikey,
        'inchi': inchi,
        'smiles': smiles
    }
    for key, val in data.items():
        if val:
            compound = cc.get_compound(val)
            # If compound is found in eQuilibrator, then...
            if compound is not None:
                # ...copy initial data into result compound
                _compound = copy_data(compound, data)
                return _compound

    if inchikey:
        # In last resort, try to search only with the first part of inchikey
        compounds = cc.search_compound_by_inchi_key(
            # first part of inchikey
            inchikey.split('-')[0]
        )
        # eQuilibrator returns a list of compounds
        if compounds:
            # first compound in the list, hope it is sorted by decrease relevance
            _compound = copy_data(compounds[0], data, overwrite=True)
            # make inchi_key the ID key
            _compound['cc_key'] = 'inchi_key'
            return _compound

    return {}
Beispiel #4
0
    def test_reduction_potential(self):
        kegg_id_to_coeff = {'C00036': -1, 'C00149': 1}  # oxaloacetate = malate
        reaction = Reaction(kegg_id_to_coeff)

        cc = ComponentContribution(pH=7.0, ionic_strength=0.1)
        E0_prime_mV, E0_uncertainty = cc.E0_prime(reaction)

        self.assertAlmostEqual(E0_prime_mV, -175.2, 1)
        self.assertAlmostEqual(E0_uncertainty, 5.3, 1)
Beispiel #5
0
    def __init__(
        self,
    ):
        # Mongo params
        self.mongo_uri = None
        self.client = None
        self._core = None

        # eQ params
        self.CC = ComponentContribution()
        self.lc = None
        self._water = None
Beispiel #6
0
def getFreeEnergyData(GEM,
                      work_directory='',
                      pH_i=None,
                      Ionic_strength=None,
                      dG0_uncertainty_threshold=None):

    # Load equilibrator data
    eq_api = ComponentContribution(p_h=Q_(str(pH_i)),
                                   ionic_strength=Q_(Ionic_strength))

    # Get iJO1366/iML1515 reactions as KEGG reaction strings
    GEM2KEGG = pd.read_csv(work_directory + '/iJO1366_reactions.csv',
                           index_col=0)

    # Prepare dictionary with dG0
    def originallyIrreversible(reaction_id):
        return 'forward' not in reaction_id and 'backward' not in reaction_id

    dG0_data = {}
    GEM_rxns = np.array([rxn.id for rxn in GEM.reactions])

    for rxn_id in GEM_rxns:
        try:
            if originallyIrreversible(rxn_id):
                id, direction = rxn_id, ''
            else:
                id, direction = re.split('_(forward|backward)', rxn_id)[:2]

            rxn = parse_reaction_formula(GEM2KEGG.loc[id.lower()]['formula'])

            dG0_prime = eq_api.standard_dg_prime(
                rxn).value.magnitude  # kJ/mmol
            dG0_uncertainty = eq_api.standard_dg_prime(
                rxn).error.magnitude  # kJ/mol
            if dG0_uncertainty < abs(dG0_uncertainty_threshold * dG0_prime):
                # remove uncertain dG0 data
                if 'backward' in direction:
                    dG0_data[rxn_id] = {
                        'dG0': -dG0_prime,
                        'error': dG0_uncertainty
                    }
                else:
                    dG0_data[rxn_id] = {
                        'dG0': dG0_prime,
                        'error': dG0_uncertainty
                    }
        except Exception:
            pass

    return dG0_data
Beispiel #7
0
    def test_gibbs_energy(self):
        kegg_id_to_coeff = {
            'C00002': -1,
            'C00001': -1,
            'C00008': 1,
            'C00009': 1
        }  # ATP + H2O = ADP + Pi
        reaction = Reaction(kegg_id_to_coeff)

        cc = ComponentContribution(pH=7.0, ionic_strength=0.1)
        dG0_prime, dG0_uncertainty = cc.dG0_prime(reaction)

        self.assertAlmostEqual(dG0_prime, -26.4, 1)
        self.assertAlmostEqual(dG0_uncertainty, 0.6, 1)
def get_dgf_priors(mi: MaudInput) -> Tuple[pd.Series, pd.DataFrame]:
    """Given a Maud input, get a multivariate prior from equilibrator.

    Returns a pandas Series of prior means and a pandas DataFrame of
    covariances. Both are indexed by metabolite ids.

    :param mi: A MaudInput object

    """
    cc = ComponentContribution()
    mu = []
    sigmas_fin = []
    sigmas_inf = []
    met_ix = pd.Index(mi.stan_coords.metabolites, name="metabolite")
    met_order = [m.id for m in mi.kinetic_model.metabolites]
    for m in mi.kinetic_model.metabolites:
        external_id = m.id if m.inchi_key is None else m.inchi_key
        c = cc.get_compound(external_id)
        if isinstance(c, Compound):
            mu_c, sigma_fin_c, sigma_inf_c = cc.standard_dg_formation(c)
            mu_c += c.transform(
                cc.p_h, cc.ionic_strength, cc.temperature, cc.p_mg
            ).m_as("kJ/mol")
            mu.append(mu_c)
            sigmas_fin.append(sigma_fin_c)
            sigmas_inf.append(sigma_inf_c)
        else:
            raise ValueError(
                f"cannot find compound for metabolite {m.id}"
                f" with external id {external_id}."
                "\nConsider setting the field metabolite_inchi_key"
                " if you haven't already."
            )
    sigmas_fin = np.array(sigmas_fin)
    sigmas_inf = np.array(sigmas_inf)
    cov = sigmas_fin @ sigmas_fin.T + 1e6 * sigmas_inf @ sigmas_inf.T
    cov = (
        pd.DataFrame(cov, index=met_order, columns=met_order)
        .loc[met_ix, met_ix]
        .round(10)
    )
    mu = (
        pd.Series(mu, index=met_order, name="prior_mean_dgf")
        .loc[met_ix]
        .round(10)
    )
    return mu, cov
def get_dGs(rxn_list: list,
            file_bigg_kegg_ids: str,
            pH: float = 7.0,
            ionic_strength: float = 0.1,
            digits: int = 2) -> dict:
    """
    Given a plain text file with reactions in the form R_FBA: m_g3p_c + m_dhap_c <-> m_fdp_c and a file with a
    mapping between bigg and kegg ids, returns the standard gibbs energy and respective uncertainty for each reaction.
    It skips exchange reactions, which should start with 'R_EX_'.

    Args:
        file_rxns: path to file with plain text reactions.
        file_bigg_kegg_ids: path to file with mapping between bigg and kegg ids.
        pH: pH value to use to calculate standard Gibbs energies.
        ionic_strength: ionic strength value to use to calculate standard Gibbs energies.
        digits: number of digits to round standard gibbs energies and respective uncertainty.

    Returns:
       Dictionary with bigg reaction ids as keys and (standard Gibbs energy, uncertainty) as values.
    """

    map_bigg_to_kegg_ids = pd.read_csv(file_bigg_kegg_ids, index_col=0)

    rxn_dict = convert_rxns_to_kegg(rxn_list, map_bigg_to_kegg_ids)

    rxn_dG_dict = {}
    eq_api = ComponentContribution(p_h=Q_(pH),
                                   ionic_strength=Q_(ionic_strength, 'M'))

    for rxn_id in rxn_dict.keys():

        rxn = parse_reaction_formula(rxn_dict[rxn_id])
        if not rxn.is_balanced():
            print(f'{rxn_id} is not balanced.')

        res = eq_api.standard_dg_prime(rxn)
        dG0 = res.value.magnitude
        dG0_std = res.error.magnitude

        rxn_dG_dict[rxn_id] = (round(dG0, digits), round(dG0_std, digits))

    return rxn_dG_dict
    def test_gibbs_energy(self):
        warnings.simplefilter('ignore', ResourceWarning)

        kegg_id_to_coeff = {'C00002' : -1, 'C00001' : -1,
                            'C00008' :  1, 'C00009' :  1} # ATP + H2O = ADP + Pi
        kegg_id_to_conc  = {'C00002' : 1e-3,
                            'C00009' :  1e-4}
        reaction = Reaction(kegg_id_to_coeff)

        cc = ComponentContribution(pH=7.0, ionic_strength=0.1)
        dG0_prime, dG0_uncertainty = cc.dG0_prime(reaction)
        
        self.assertAlmostEqual(dG0_prime, -26.4, 1)
        self.assertAlmostEqual(dG0_uncertainty, 0.6, 1)
        
        dG_prime, _ = cc.dG_prime(reaction, kegg_id_to_conc)
        self.assertAlmostEqual(dG_prime, -32.1, 1)

        dGm_prime, _ = cc.dGm_prime(reaction)
        self.assertAlmostEqual(dGm_prime, -43.5, 1)
Beispiel #11
0
def reaction_gibbs(equation, dfG_dict=None, pH=7.0, eq_api=None):
    """Calculate standard Gibbs reaction energy."""
    # Remove compartment tags from compound IDs
    equation = re.sub("_\[[a-z]{3}\]", "", equation)
    equation = re.sub("_[a-z]{1}", "", equation)
    if dfG_dict:
        s = parse_equation(equation)
        if None in [dfG_dict[c[1]] for c in s[0] + s[1]]:
            # Cannot calculate drG when dfG is missing
            return None
        else:
            p = sum([c[0] * Decimal(str(dfG_dict[c[1]])) for c in s[1]])
            r = sum([c[0] * Decimal(str(dfG_dict[c[1]])) for c in s[0]])
            return float(p - r)
    else:
        # Use Equilibrator API
        if not eq_api:
            eq_api = ComponentContribution(pH=pH, ionic_strength=0.1)
        rxn = Reaction.parse_formula(equation)
        return round(eq_api.dG0_prime(rxn)[0], 1)
Beispiel #12
0
def initialze_cc(pH=7.0, pMg=3.0, temperature=298.15, ionic_strength=0.25):
    cc = ComponentContribution()
    cc.p_h = Q_(pH)
    cc.p_mg = Q_(pMg)
    cc.ionic_strength = Q_(str(ionic_strength) + "M")
    cc.temperature = Q_(str(temperature) + "K")
    return cc
Beispiel #13
0
def predict_dG(model, pH=7, ionic_strength=0.1, fn=None):
    eq_api = ComponentContribution(pH=7.0, ionic_strength=0.1)
    lst = []
    for r in model.reactions:
        kegg_string = build_kegg_string(r)
        if kegg_string:
            try:
                rxn = Reaction.parse_formula(kegg_string)
            except KeyError:
                print("eQuilibrator could not predict dG for {0}".format(r.id))
                continue
            # eq_api.dG0_prime(rxn)
            try:
                dgm = eq_api.dGm_prime(rxn)
                dg0 = eq_api.dG0_prime(rxn)
            except:
                print("eQuilibrator could not predict dG for {0}".format(r.id))
            else:
                if dgm[-1] > dGMm_STD_THRESHOLD:
                    print(
                        "eQuilibrator could not predict dG for {0} with a reasonable uncertainty"
                        .format(r.id))
                    continue
                print(r.id, dgm)
                lst.append([
                    r.id, r.name, r.annotation["kegg.reaction"],
                    parse_reversibility(r), *dg0, *dgm
                ])
    # Store as df
    df = pd.DataFrame(lst)
    df.columns = [
        "Rxn", "Rxn Name", "Rxn KEGG ID", "Reversibility in model",
        "dG0_prime", "dG0_prime_std", "dGm_prime", "dGm_prime_std"
    ]
    if not fn:
        fn = "eQuilibrator_reversibility.csv"
    df.to_csv(fn, index=False, sep=",")
    print("Found the dG for {0} of {1} reactions".format(
        len(lst), len(model.reactions)))
    return lst
Beispiel #14
0
    def __init__(self, rpsbml=None, ph=7.5, ionic_strength=200, pMg=10.0, temp_k=298.15):
        """Constructor class for rpEquilibrator

        :param rpsbml: rpSBML object (Default: None)
        :param ph: pH of the cell input from the rpSBML model input (Default: 7.5)
        :param ionic_strength: Ionic strength from the rpSBML model input (Default: 200)
        :param pMg: pMg value from the rpSBML model input (Default: 10.0)
        :param temp_k: Temperature from the rpSBML model input in Kelvin (Default: 298.15)

        :type rpsbml: rpSBML
        :type ph: float
        :type ionic_strength: float
        :type pMg: float
        :type temp_k: float

        .. document private functions
        .. automethod:: _makeSpeciesStr
        .. automethod:: _makeReactionStr
        .. automethod:: _speciesCmpQuery
        .. automethod:: _reactionCmpQuery
        .. automethod:: _reactionStrQuery
        """
        self.logger = logging.getLogger(__name__)
        self.logger.debug('Started instance of rpEquilibrator')
        self.cc = ComponentContribution()
        self.cc.p_h = Q_(ph)
        self.cc.ionic_strength = Q_(str(ionic_strength)+' mM')
        self.cc.p_mg = Q_(pMg)
        self.cc.temperature = Q_(str(temp_k)+' K')
        self.ph = ph
        self.ionic_strength = ionic_strength
        self.pMg = pMg
        self.temp_k = temp_k
        #self.mnx_default_conc = json.load(open('data/mnx_default_conc.json', 'r'))
        self.mnx_default_conc = json.load(open(os.path.join(os.path.dirname(os.path.abspath( __file__ )), 'data', 'mnx_default_conc.json'), 'r'))
        self.rpsbml = rpsbml
        self.calc_cmp = {}
Beispiel #15
0
def initThermo(
    ph: float=DEFAULT_pH,
    ionic_strength: float=DEFAULT_ionic_strength,
    pMg: float=DEFAULT_pMg,
    logger: Logger=getLogger(__name__)
) -> ComponentContribution:

    print_title(
        txt='Initialising eQuilibrator...',
        logger=logger,
        waiting=True
    )

    cc = ComponentContribution()

    cc.p_h = Q_(ph)
    cc.ionic_strength = Q_(f'{ionic_strength}M')
    cc.p_mg = Q_(pMg)
    # if temp_k is not None:
    #     cc.temperature = Q_(str(temp_k)+' K')

    print_OK(logger)

    return cc
Beispiel #16
0
    sys.stderr.write('I = %.1f M\n' % args.i)

    ids = []
    reactions = []
    with open('data/iJO1366_reactions.csv', 'r') as f:
        for row in csv.DictReader(f):
            ids.append(row['bigg.reaction'])
            try:
                reactions.append(Reaction.parse_formula(row['formula']))
            except ValueError as e:
                print('warning: cannot parse reaction %s because of %s' %
                      (row['bigg.reaction'], str(e)))
                reactions.append(Reaction({}))
                continue

    equilibrator = ComponentContribution(pH=args.ph, ionic_strength=args.i)

    dG0_prime, U = equilibrator.dG0_prime_multi(reactions)

    writer = csv.writer(args.outfile)
    header = [
        'reaction', 'pH', 'ionic strength [M]', 'dG\'0 [kJ/mol]',
        'uncertainty [kJ/mol]', 'ln(Reversibility Index)', 'comment'
    ]
    writer.writerow(header)
    for s, r, dg0, u in zip(ids, reactions, dG0_prime.flat, U.diagonal().flat):
        row = [s, args.ph, args.i]
        if r.is_empty():
            row += [nan, nan, nan, 'reaction is empty']
        elif r.check_full_reaction_balancing():
            ln_RI = r.calculate_reversibility_index_from_dG0_prime(dg0)
Beispiel #17
0
DG_STD_MAX = 50  # kJ/mol

CYTOPLASMIC_PH = 7.4
CYTOPLASMIC_IONIC_STRENGTH = 0.25  # in M

LB_DICT = {'c': 1e-9, 'p': 1e-9, 'e': 1e-9}  # in M
UB_DICT = {'c': 1e-1, 'p': 1e-1, 'e': 1}  # in M

if False:
    # override constraints for testing purposes
    LB_DICT = {'c': 1e-99, 'p': 1e-99, 'e': 1e-99}  # in M
    UB_DICT = {'c': 1e99, 'p': 1e99, 'e': 1e99}  # in M

sys.path.append(os.path.expanduser('~/git/equilibrator-api/'))
from equilibrator_api import ComponentContribution, Reaction
equilibrator = ComponentContribution(pH=CYTOPLASMIC_PH,
                                     ionic_strength=CYTOPLASMIC_IONIC_STRENGTH)

USE_CORE = False

if USE_CORE:
    from cobra.io import read_sbml_model
    cobra_model = read_sbml_model(settings.CORE_SBML_FNAME)
    BM_RXN = 'BIOMASS_Ecoli_core_w_GAM'
else:
    from cobra.io import load_json_model
    cobra_model = load_json_model(settings.IJO1366_JSON_FNAME)
    BM_RXN = 'BIOMASS_Ec_iJO1366_core_53p95M'

###############################################################################

Beispiel #18
0
"""
#INPUTS:
ECM_path = '/Users/ivand/Documents/GitHub/ECM_Yeast'
R    = 0.008314472 #Universal constant of gases
Temp = 298.15      #25 celsius
#Load libraries
import os
from os import path
from math import exp
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from equilibrator_api import ComponentContribution, Reaction, ReactionMatcher

#Main Script
eq_api = ComponentContribution(pH=7.5, ionic_strength=0.1)
#Load model data file
os.chdir(ECM_path+'/Models')
data  = pd.read_table('KEGGmodelScaffold.txt')
os.chdir(ECM_path)
i=0
output     = '!!SBtab TableName=Parameter TableType=Quantity Document=S. cervisiae central carbon metabolism SBtabVersion=1.0'+'\n'
output     = output+ '!QuantityType'+'\t'+'!Reaction'+'\t'+'!Compound'+'\t'+'!Value'+'\t'+'!Unit'+'\t'+'!Reaction:Identifiers:kegg.reaction'+'\t'+'!Compound:Identifiers:kegg.compound'+'\t'+'!ID'+'\n'
unit       = 'dimensionless'
compound   = ' '
compoundID = ' '
type       = 'equilibrium constant'

for formula in data.ReactionFormula:
    rxn         = Reaction.parse_formula(formula)
    name        = str(data.ID[i])
Beispiel #19
0
logging.getLogger().setLevel(logging.WARNING)

sys.stderr.write('pH = %.1f\n' % args.ph)
sys.stderr.write('I = %.1f M\n' % args.i)
sys.stderr.write('Reaction: %s\n' % args.reaction)
sys.stderr.flush()

# parse the reaction
try:
    reaction = Reaction.parse_formula(args.reaction)
except ValueError as e:
    logging.error(str(e))
    sys.exit(-1)

equilibrator = ComponentContribution(pH=args.ph, ionic_strength=args.i)

n_e = reaction.check_half_reaction_balancing()
if n_e is None:
    logging.error('reaction is not chemically balanced')
    sys.exit(-1)
elif n_e == 0:
    dG0_prime, dG0_uncertainty = equilibrator.dG0_prime(reaction)
    sys.stdout.write(u'\u0394G\'\u00B0 = %.1f \u00B1 %.1f kJ/mol\n' %
                     (dG0_prime, dG0_uncertainty))

    ln_RI = equilibrator.reversibility_index(reaction)
    sys.stdout.write(u'ln(Reversibility Index) = %.1f\n' % ln_RI)

else:  # treat as a half-reaction
    logging.warning('This reaction isn\'t balanced, but can still be treated'
Beispiel #20
0
class rpEquilibrator:
    """Class containing collection of functions to intereact between rpSBML files and equilibrator. Includes a function to convert an rpSBML file to a SBtab format for MDF analysis
    """
    def __init__(self, rpsbml=None, ph=7.5, ionic_strength=200, pMg=10.0, temp_k=298.15):
        """Constructor class for rpEquilibrator

        :param rpsbml: rpSBML object (Default: None)
        :param ph: pH of the cell input from the rpSBML model input (Default: 7.5)
        :param ionic_strength: Ionic strength from the rpSBML model input (Default: 200)
        :param pMg: pMg value from the rpSBML model input (Default: 10.0)
        :param temp_k: Temperature from the rpSBML model input in Kelvin (Default: 298.15)

        :type rpsbml: rpSBML
        :type ph: float
        :type ionic_strength: float
        :type pMg: float
        :type temp_k: float

        .. document private functions
        .. automethod:: _makeSpeciesStr
        .. automethod:: _makeReactionStr
        .. automethod:: _speciesCmpQuery
        .. automethod:: _reactionCmpQuery
        .. automethod:: _reactionStrQuery
        """
        self.logger = logging.getLogger(__name__)
        self.logger.debug('Started instance of rpEquilibrator')
        self.cc = ComponentContribution()
        self.cc.p_h = Q_(ph)
        self.cc.ionic_strength = Q_(str(ionic_strength)+' mM')
        self.cc.p_mg = Q_(pMg)
        self.cc.temperature = Q_(str(temp_k)+' K')
        self.ph = ph
        self.ionic_strength = ionic_strength
        self.pMg = pMg
        self.temp_k = temp_k
        #self.mnx_default_conc = json.load(open('data/mnx_default_conc.json', 'r'))
        self.mnx_default_conc = json.load(open(os.path.join(os.path.dirname(os.path.abspath( __file__ )), 'data', 'mnx_default_conc.json'), 'r'))
        self.rpsbml = rpsbml
        self.calc_cmp = {}
    

    ##################################################################################
    ############################### PRIVATE ##########################################
    ##################################################################################


    #TODO: metanetx.chemical:MNXM7 + bigg.metabolite:pi
    def _makeSpeciesStr(self, libsbml_species, ret_type='xref'):
        """Private function that makes a Equilibrator friendly string of a species

        :param libsbml_species: A libsbml species object
        :param ret_type: Type of output. Valid output include: ['name', 'id', 'xref']

        :type libsbml_species: libsbml.Species
        :type ret_type: str

        Take a libsbml species object, parse the MIRIAM or the brsynth (if present) to return 
        the equilibrator appropriate string. The order of preference is the following:
        example input MIRIAM annotation: {'inchikey': ['GPRLSGONYQIRFK-UHFFFAOYSA-N'], 'seed': ['cpd00067'], 'sabiork': ['39'], 'reactome': ['R-ALL-74722', 'R-ALL-70106', 'R-ALL-5668577', 'R-ALL-428548', 'R-ALL-428040', 'R-ALL-427899', 'R-ALL-425999', 'R-ALL-425978', 'R-ALL-425969', 'R-ALL-374900', 'R-ALL-372511', 'R-ALL-351626', 'R-ALL-2872447', 'R-ALL-2000349', 'R-ALL-194688', 'R-ALL-193465', 'R-ALL-163953', 'R-ALL-156540', 'R-ALL-1470067', 'R-ALL-113529', 'R-ALL-1132304'], 'metacyc': ['PROTON'], 'hmdb': ['HMDB59597'], 'chebi': ['5584', '13357', '10744', '15378'], 'bigg': ['M_h', 'h'], 'metanetx': ['MNXM89553', 'MNXM145872', 'MNXM1', 'MNXM01']}
        -KEGG
        -CHEBI
        #-bigg
        #-MNX
        -inchikey
        ret_type -> valid options (xref, id, name)

        :rtype: str
        :return: The string id of the species or False if fail
        """
        self.logger.debug('ret_type: '+str(ret_type))
        if ret_type=='name':
            return libsbml_species.getName()
        elif ret_type=='id':
            return libsbml_species.getId()
        elif ret_type=='xref':
            annot = libsbml_species.getAnnotation()
            if not annot:
                self.logger.error('Cannot retreive the annotation')
                return False
            miriam_dict = self.rpsbml.readMIRIAMAnnotation(annot)
            self.logger.debug('miriam_dict: '+str(miriam_dict))
            if not miriam_dict:
                self.logger.warning('The object annotation does not have any MIRIAM entries')
                return False
            if 'kegg' in miriam_dict:
                if miriam_dict['kegg']:
                    try:
                        #take the lowest value
                        int_list = [int(i.replace('C', '')) for i in miriam_dict['kegg']]
                        return 'KEGG:'+str(miriam_dict['kegg'][int_list.index(min(int_list))])
                    except ValueError:
                        self.logger.warning('There is a non int value in: '+str(miriam_dict['kegg']))
            elif 'chebi' in miriam_dict:
                if miriam_dict['chebi']:
                    try:
                        #take the lowest value
                        int_list = [int(i) for i in miriam_dict['chebi']]
                        return 'CHEBI:'+str(miriam_dict['chebi'][int_list.index(min(int_list))])
                    except ValueError:
                        self.logger.warning('There is a non int value in: '+str(miriam_dict['chebi']))
            elif 'metanetx' in miriam_dict:
                if miriam_dict['metanetx']:
                    try:
                        #take the lowest value
                        int_list = [int(i.replace('MNXM', '')) for i in miriam_dict['metanetx']]
                        return 'metanetx.chemical:'+str(miriam_dict['metanetx'][int_list.index(min(int_list))])
                    except ValueError:
                        self.logger.warning('There is a non int value in: '+str(miriam_dict['metanetx']))
            elif 'inchikey' in miriam_dict:
                if miriam_dict['inchikey']:
                    if len(miriam_dict['inchikey'])==1:
                        return miriam_dict['inchikey'][0]
                    else:
                        self.logger.warning('There are multiple values of inchikey: '+str(miriam_dict['inchikey']))
                        self.logger.warning('Taking the first one')
                        return miriam_dict['inchikey'][0]
            else:
                self.logger.warning('Could not extract string input for '+str(miriam_dict))
                return False
            self.logger.warning('The MIRIAM annotation does not have the required information')
            return False
        else:
            self.logger.warning('Cannot determine ret_type: '+str(ret_type))


    def _makeReactionStr(self, libsbml_reaction, ret_type='xref', ret_stoichio=True):
        """Make the reaction formulae string to query equilibrator

        :param libsbml_reaction: A libsbml reaction object
        :param ret_type: Type of output. Valid output include: ['name', 'id', 'xref'] (Default: xref)
        :param ret_stoichio: Return the stoichio or not (Default: True)

        :type libsbml_reaction: libsbml.Reaction
        :type ret_type: str
        :type ret_stoichio: bool

        :rtype: str
        :return: The string id of the reaction or False if fail
        """
        reac_str = ''
        for rea in libsbml_reaction.getListOfReactants():
            rea_str = self._makeSpeciesStr(self.rpsbml.model.getSpecies(rea.getSpecies()), ret_type)
            if rea_str:
                if ret_stoichio:
                    reac_str += str(rea.getStoichiometry())+' '+str(rea_str)+' + '
                else:
                    reac_str += str(rea_str)+' + '
            else:
                return False
        reac_str = reac_str[:-2]
        reac_str += '<=> ' #TODO: need to find a way to determine the reversibility of the reaction
        for pro in libsbml_reaction.getListOfProducts():
            pro_str = self._makeSpeciesStr(self.rpsbml.model.getSpecies(pro.getSpecies()), ret_type)
            if pro_str:
                if ret_stoichio:
                    reac_str += str(pro.getStoichiometry())+' '+str(pro_str)+' + '
                else:
                    reac_str += str(pro_str)+' + '
            else:
                return False
        reac_str = reac_str[:-2]
        self.logger.debug('reac_str: '+str(reac_str))
        return reac_str


    ################### Equilibrator component contribution queries instead of using the native functions ###########


    def _speciesCmpQuery(self, libsbml_species):
        """Use the native equilibrator-api compound contribution method

        :param libsbml_species: A libsbml species object

        :type libsbml_species: libsbml.Reaction

        :rtype: tuple
        :return: Tuple of size two with mu and sigma values in that order or (None, None) if fail
        """
        annot = libsbml_species.getAnnotation()
        if not annot:
            self.logger.warning('The annotation of '+str(libsbml_species)+' is None....')
            return None, None
        brs_annot = self.rpsbml.readBRSYNTHAnnotation(libsbml_species.getAnnotation())
        #TODO: handle the condition where there are no inchi values but there are SMILES -- should rarely, if ever happen
        self.logger.debug('libsbml_species: '+str(libsbml_species))
        #self.logger.debug('brs_annot: '+str(brs_annot))
        #Try to get the cmp from the ID
        spe_id = self._makeSpeciesStr(libsbml_species)
        spe_cmp = None
        if spe_id:
            self.logger.debug('Trying to find the CMP using the xref string: '+str(spe_id))
            spe_cmp = self.cc.ccache.get_compound(self._makeSpeciesStr(libsbml_species))
        if not spe_cmp:
            self.logger.debug('Trying to find the CMP using the structure')
            #try to find it in the local data - we do this because there are repeated species in many files
            if brs_annot['inchi'] in self.calc_cmp:
                spe_cmp = self.calc_cmp[brs_annot['inchi']]
            elif brs_annot['smiles'] in self.calc_cmp:
                spe_cmp = self.calc_cmp[brs_annot['smiles']]
            else:
                #if you cannot find it then calculate it
                try:
                    spe_cmp = get_or_create_compound(self.cc.ccache, brs_annot['inchi'], mol_format='inchi')
                    self.calc_cmp[brs_annot['inchi']] = spe_cmp
                    self.calc_cmp[brs_annot['smiles']] = spe_cmp
                except (OSError, KeyError, GroupDecompositionError) as e:
                    try:
                        spe_cmp = get_or_create_compound(self.cc.ccache, brs_annot['smiles'], mol_format='smiles')
                        self.calc_cmp[brs_annot['smiles']] = spe_cmp
                        self.calc_cmp[brs_annot['inchi']] = spe_cmp
                    except (OSError, KeyError, GroupDecompositionError) as e:
                        self.logger.warning('The following species does not have brsynth annotation InChI or SMILES: '+str(libsbml_species.getId()))
                        self.logger.warning('Or Equilibrator could not convert the structures')
                        self.logger.warning(e)
                        return None, None
        if spe_cmp.id==4: #this is H+ and can be ignored
            return 'h', 'h'
        self.logger.debug('spe_cmp: '+str(spe_cmp))
        #mu, sigma = self.cc.predictor.preprocess.get_compound_prediction(eq_cmp[0])
        mu, sigma = self.cc.predictor.preprocess.get_compound_prediction(spe_cmp)
        return mu, sigma


    def _reactionCmpQuery(self, libsbml_reaction, write_results=False, physio_param=1e-3):
        """This method makes a list of structure compounds and uses equilibrator to return the reaction dG

        :param libsbml_reaction: A libsbml reaction object
        :param write_results: Write the results to the rpSBML file (Default: False)
        :param physio_param: The physiological parameter, i.e. the concentration of the compounds to calculate the dG (Default: 1e-3)

        :type libsbml_reaction: libsbml.Reaction
        :type write_results: bool
        :type physio_param: float

        :rtype: tuple
        :return: Tuple of size three with dfG_prime_o, dfG_prime_m, uncertainty values in that order or False if fail
        """
        mus = []
        sigma_vecs = []
        S = []
        dfG_prime_o = None
        dfG_prime_m = None
        uncertainty = None
        for rea in libsbml_reaction.getListOfReactants():
            self.logger.debug('------------------- '+str(rea.getSpecies())+' --------------')
            mu, sigma = self._speciesCmpQuery(self.rpsbml.model.getSpecies(rea.getSpecies()))
            self.logger.debug('mu: '+str(mu))
            if not mu:
                self.logger.warning('Failed to calculate the reaction mu thermodynamics using compound query')
                if write_results:
                    self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_o', 0.0, 'kj_per_mol')
                    self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_m', 0.0, 'kj_per_mol')
                    self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_uncert', 0.0, 'kj_per_mol')
                return False
            elif mu=='h': #skipping the Hydrogen
                continue
            mus.append(mu)
            sigma_vecs.append(sigma)
            S.append([-rea.getStoichiometry()])
        for pro in libsbml_reaction.getListOfProducts():
            mu, sigma = self._speciesCmpQuery(self.rpsbml.model.getSpecies(pro.getSpecies()))
            if not mu:
                self.logger.warning('Failed to calculate the reaction mu thermodynamics using compound query')
                if write_results:
                    self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_o', 0.0, 'kj_per_mol')
                    self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_m', 0.0, 'kj_per_mol')
                    self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_uncert', 0.0, 'kj_per_mol')
                return False
            elif mu=='h': #skipping the Hydrogen
                continue
            mus.append(mu)
            sigma_vecs.append(sigma)
            S.append([pro.getStoichiometry()])
        mus = Q_(mus, 'kJ/mol')
        sigma_vecs = Q_(sigma_vecs, 'kJ/mol')
        np_S = np.array(S)
        dfG_prime_o = np_S.T@mus
        dfG_prime_o = float(dfG_prime_o.m[0])
        ###### adjust fot physio parameters to calculate the dGm'
        #TODO: check with Elad
        dfG_prime_m = float(dfG_prime_o)+float(self.cc.RT.m)*sum([float(sto[0])*float(np.log(co)) for sto, co in zip(S, [physio_param]*len(S))])
        uncertainty = np_S.T@sigma_vecs
        uncertainty = [email protected]
        uncertainty = uncertainty.m[0][0]
        if write_results:
            self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_o', dfG_prime_o, 'kj_per_mol')
            self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_m', dfG_prime_m, 'kj_per_mol')
            self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_uncert', uncertainty, 'kj_per_mol')
        return dfG_prime_o, dfG_prime_m, uncertainty

    
    '''
    ## Not sure if we should implement such a function -- recommended by Elad I geuss
    #
    #
    def pathwayCmpQuery(self, write_results=False):
        #1) build the stochio matrix taking into account all the species of the reaction -- must keep track
        pass

    #################### native equilibrator-api functions ###############

    def speciesStrQuery(self, libsbml_species, write_results=False):
        """
        Return the formation energy of a chemical species
        """
        return False
    '''

    #TODO: when an inchikey is passed, (and you don't have any other xref) and equilibrator finds the correct species then update the MIRIAM annotations
    def _reactionStrQuery(self, libsbml_reaction, write_results=False):
        """Build the string reaction from a libSBML reaction object to send to equilibrator and return the different thermodynamics analysis available

        :param libsbml_reaction: A libsbml reaction object
        :param write_results: Write the results to the rpSBML file (Default: False)

        :type libsbml_reaction: libsbml.Reaction
        :type write_results: bool

        :rtype: bool
        :return: Success or failue of the function
        """
        reac_str = ''
        try:
            reac_str = self._makeReactionStr(libsbml_reaction)
            self.logger.debug('The reaction string is: '+str(reac_str))
            if not reac_str:
                self.logger.warning('Could not generate the reaction string for: '+str(libsbml_reaction))
                if write_results:
                    self.logger.warning('Writing the 0 results to the file')
                    self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_o', 0.0, 'kj_per_mol')
                    self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_m', 0.0, 'kj_per_mol')
                    self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_uncert', 0.0, 'kj_per_mol')
                    #Are there default values for these?
                    #self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'reversibility_index', 0.0)
                    #self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'balanced', rxn.is_balanced())
                return False
            rxn = self.cc.parse_reaction_formula(reac_str)
            standard_dg = self.cc.standard_dg(rxn)
            standard_dg_prime = self.cc.standard_dg_prime(rxn)
            physiological_dg_prime = self.cc.physiological_dg_prime(rxn)
            ln_reversibility_index = self.cc.ln_reversibility_index(rxn)
            if type(ln_reversibility_index)==float:
                self.logger.warning('The reversibility index is infinite: '+str(ln_reversibility_index))
                ln_reversibility_index = None
                ln_reversibility_index_error = None
            else:
                ln_reversibility_index_error = ln_reversibility_index.error.m
                ln_reversibility_index = ln_reversibility_index.value.m
            self.logger.debug(rxn.is_balanced())
            self.logger.debug('ln_reversibility_index: '+str(ln_reversibility_index))
            self.logger.debug('standard_dg.value.m: '+str(standard_dg.value.m))
            self.logger.debug('standard_dg.error.m: '+str(standard_dg.error.m))
            self.logger.debug('standard_dg_prime.value.m: '+str(standard_dg_prime.value.m))
            self.logger.debug('standard_dg_prime.error.m: '+str(standard_dg_prime.error.m))
            self.logger.debug('physiological_dg_prime.value.m: '+str(physiological_dg_prime.value.m))
            self.logger.debug('physiological_dg_prime.error.m: '+str(physiological_dg_prime.error.m))
            if write_results:
                self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_o', standard_dg_prime.value.m, 'kj_per_mol')
                self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_m', physiological_dg_prime.value.m, 'kj_per_mol')
                self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_uncert', standard_dg.error.m, 'kj_per_mol')
                if ln_reversibility_index:
                    self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'reversibility_index', ln_reversibility_index)
                self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'balanced', rxn.is_balanced())
            return (rxn.is_balanced(),
                   (ln_reversibility_index, ln_reversibility_index_error),
                   (float(standard_dg.value.m), float(standard_dg.error.m)),
                   (float(standard_dg_prime.value.m), float(standard_dg_prime.error.m)), 
                   (float(physiological_dg_prime.value.m), float(physiological_dg_prime.error.m)))
        except equilibrator_cache.exceptions.ParseException:
            self.logger.warning('One of the reaction species cannot be parsed by equilibrator: '+str(reac_str))
            return False
        except equilibrator_cache.exceptions.MissingDissociationConstantsException:
            self.logger.warning('Some of the species have not been pre-caclulated using ChemAxon')


    ################################################################################
    ########################### PUBLIC FUNCTIONS ###################################
    ################################################################################


    #WARNING: taking the sum of the reaction thermodynamics is perhaps not the best way to do it
    def pathway(self, pathway_id='rp_pathway', write_results=True):
        """Calculate the dG of a heterologous pathway

        :param pathway_id: The id of the heterologous pathway of interest (Default: rp_pathway)
        :param write_results: Write the results to the rpSBML file (Default: True)

        :type pathway_id: str
        :type write_results: bool

        :rtype: tuple
        :return: Tuple with the following information, in order: sum dG_prime, std dG_prime, sum dG_prime_o, std dG_prime_o, sum dG_prime_m, std dG_prime_m. Also False if function error.
        """
        groups = self.rpsbml.model.getPlugin('groups')
        rp_pathway = groups.getGroup(pathway_id)
        if not rp_pathway:
            self.logger.error('Cannot retreive the pathway: '+str(pathway_id))
            return False
        pathway_balanced = []
        pathway_reversibility_index = []
        pathway_reversibility_index_error = []
        pathway_standard_dg = []
        pathway_standard_dg_error = []
        pathway_standard_dg_prime = []
        pathway_standard_dg_prime_error = []
        pathway_physiological_dg_prime = []
        pathway_physiological_dg_prime_error = []
        for react in [self.rpsbml.model.getReaction(i.getIdRef()) for i in rp_pathway.getListOfMembers()]:
            self.logger.debug('Sending the following reaction to _reactionStrQuery: '+str(react))
            res = self._reactionStrQuery(react, write_results)
            self.logger.debug('The result is :'+str(res))
            if res:
                #WARNING: the uncertainty for the three thermo calculations should be the same
                pathway_balanced.append(res[0])
                if not res[1][0]==None:
                    pathway_reversibility_index.append(res[1][0])
                else:
                    pathway_reversibility_index.append(0.0)
                if not res[1][1]==None:
                    pathway_reversibility_index_error.append(res[1][1])
                else:
                    pathway_reversibility_index_error.append(0.0)
                #ignoring --  need to see if legacy component contribution can return these values
                #pathway_standard_dg.append(res[2][0])
                #pathway_standard_dg_error.append(res[2][1])
                pathway_standard_dg_prime.append(res[3][0])
                pathway_standard_dg_prime_error.append(res[3][1])
                pathway_physiological_dg_prime.append(res[4][0])
                pathway_physiological_dg_prime_error.append(res[4][1])
            else:
                self.logger.info('Native equilibrator string query failed')
                self.logger.info('Trying equilibrator_api component contribution')
                self.logger.debug('Trying to calculate using CC: '+str(react))
                res = self._reactionCmpQuery(react, write_results)
                if res:
                    pathway_standard_dg_prime.append(res[0])
                    pathway_standard_dg_prime_error.append(res[2])
                    pathway_physiological_dg_prime.append(res[1])
                    pathway_physiological_dg_prime_error.append(res[2])
                    #TODO: need to implement
                    pathway_balanced.append(None)
                    pathway_reversibility_index.append(None)
                    pathway_reversibility_index_error.append(None)
                else:
                    self.logger.warning('Cannot calculate the thermodynmics for the reaction: '+str(react))
                    self.logger.warning('Setting everything to 0')
                    pathway_standard_dg_prime.append(0.0)
                    pathway_standard_dg_prime_error.append(0.0)
                    pathway_physiological_dg_prime.append(0.0)
                    pathway_physiological_dg_prime_error.append(0.0)
                    #TODO: need to implement
                    pathway_balanced.append(None)
                    pathway_reversibility_index.append(None)
                    pathway_reversibility_index_error.append(None)
                    #return False
        #WARNING return is ignoring balanced and reversibility index -- need to implement in legacy to return it (however still writing these results to the SBML)
        if write_results:
            self.rpsbml.addUpdateBRSynth(rp_pathway, 'dfG_prime_o', np.sum(pathway_standard_dg_prime), 'kj_per_mol')
            self.rpsbml.addUpdateBRSynth(rp_pathway, 'dfG_prime_o_std', np.std(pathway_standard_dg_prime), 'kj_per_mol')
            self.rpsbml.addUpdateBRSynth(rp_pathway, 'dfG_prime_m', np.sum(pathway_physiological_dg_prime), 'kj_per_mol')
            self.rpsbml.addUpdateBRSynth(rp_pathway, 'dfG_prime_m_std', np.std(pathway_physiological_dg_prime), 'kj_per_mol')
            self.rpsbml.addUpdateBRSynth(rp_pathway, 'dfG_uncert', np.mean(pathway_standard_dg_prime_error), 'kj_per_mol')
            self.rpsbml.addUpdateBRSynth(rp_pathway, 'dfG_uncert_std', np.std(pathway_standard_dg_prime_error), 'kj_per_mol')
        return (np.sum(pathway_standard_dg_prime), np.std(pathway_standard_dg_prime)), (np.sum(pathway_physiological_dg_prime), np.std(pathway_physiological_dg_prime)), (np.sum(pathway_standard_dg_prime), np.std(pathway_standard_dg_prime))


    def toNetworkSBtab(self, output, pathway_id='rp_pathway', thermo_id='dfG_prime_o', fba_id='fba_obj_fraction', stdev_factor=1.96):
        """Convert an SBML pathway to a simple network for input to equilibrator-pathway for MDF

        :param output: Output path of the TSV file
        :param pathway_id: The id of the heterologous pathway of interest (Default: rp_pathway)
        :param thermo_id: The id of the thermodynamics result to be exported to the SBtab file (Default: dfG_prime_o, Valid Options: [dfG_prime_o, dfG_prime_m])
        :param fba_id: The id of the FBA value to be exported to SBtab (Default: fba_obj_fraction)
        :param stdev_factor: The standard deviation factor (Default: 1.96)

        :type output: str
        :type pathway_id: str
        :type thermo_id: str
        :type fba_id: str
        :type stdev_factor: float

        :rtype: bool
        :return: Success or failure of the function
        """
        groups = self.rpsbml.model.getPlugin('groups')
        rp_pathway = groups.getGroup(pathway_id)
        if not rp_pathway:
            self.logger.error('Cannot retreive the pathway: '+str(pathway_id))
            return False
        with open(output, 'w') as fo:
            ####################### Make the header of the document ##############
            fo.write("!!!SBtab DocumentName='E. coli central carbon metabolism - balanced parameters' SBtabVersion='1.0'\t\t\t\n")
            fo.write("!!SBtab TableID='Configuration' TableType='Config'\t\t\t\n")
            fo.write("!Option\t!Value\t!Comment\t\n")
            fo.write("algorithm\tMDF\tECM, or MDF\t\n")
            fo.write("p_h\t"+str(self.ph)+"\t\t\n")
            fo.write("ionic_strength\t"+str(self.ionic_strength)+" mM\t\t\n")
            fo.write("p_mg\t"+str(self.pMg)+"\t\t\n")
            fo.write("stdev_factor    "+str(stdev_factor)+"\n")
            fo.write("\t\t\t\n")
            ####################### Make the reaction list ######################
            fo.write("!!SBtab TableID='Reaction' TableType='Reaction'\t\t\t\n")
            fo.write("!ID\t!ReactionFormula\t\t\n")
            #TODO: need to sort it in the reverse order
            #TODO: use the rpGraph to sort the pathway in the right order
            ordered_react = [self.rpsbml.model.getReaction(i.getIdRef()) for i in rp_pathway.getListOfMembers()]
            ordered_react.sort(key=lambda x: int(x.getId().replace('RP', '')), reverse=True)
            #for react in [self.rpsbml.model.getReaction(i.getIdRef()) for i in rp_pathway.getListOfMembers()]:   
            for react in ordered_react:
                react_str = self._makeReactionStr(react, 'id', True)
                if react_str:
                    fo.write(str(react.getId())+"\t"+str(react_str)+"\n")
                else:
                    self.logger.error('Cannot build the reaction: '+str(rect))
                    return False
            fo.write("\t\t\t\n")
            fo.write("\t\t\t\n")
            ########################## Make the species list ###################
            fo.write("!!SBtab TableID='Compound' TableType='Compound'\t\t\t\n")
            fo.write("!ID\t!Identifiers\t\t\n")
            rp_species = self.rpsbml.readUniqueRPspecies(pathway_id)
            for spe_id in rp_species:
                spe = self.rpsbml.model.getSpecies(spe_id)
                miriam_dict = self.rpsbml.readMIRIAMAnnotation(spe.getAnnotation())
                if not miriam_dict:
                    self.logger.warning('The object annotation does not have any MIRIAM entries')
                    return False
                iden_str = None
                if 'kegg' in miriam_dict:
                    if miriam_dict['kegg']:
                        try:
                            #take the lowest value
                            int_list = [int(i.replace('C', '')) for i in miriam_dict['kegg']]
                            iden_str = 'KEGG:'+str(miriam_dict['kegg'][int_list.index(min(int_list))])
                        except ValueError:
                            self.logger.warning('There is a non int value in: '+str(miriam_dict['kegg']))
                if 'chebi' in miriam_dict and not iden_str:
                    if miriam_dict['chebi']:
                        try:
                            #take the lowest value
                            int_list = [int(i) for i in miriam_dict['chebi']]
                            iden_str = 'CHEBI:'+str(miriam_dict['chebi'][int_list.index(min(int_list))])
                        except ValueError:
                            self.logger.warning('There is a non int value in: '+str(miriam_dict['chebi']))
                if 'metanetx' in miriam_dict and not iden_str:
                    if miriam_dict['metanetx']:
                        try:
                            #take the lowest value
                            int_list = [int(i.replace('MNXM', '')) for i in miriam_dict['metanetx']]
                            iden_str = 'metanetx.chemical:'+str(miriam_dict['metanetx'][int_list.index(min(int_list))])
                        except ValueError:
                            self.logger.warning('There is a non int value in: '+str(miriam_dict['metanetx']))
                if 'inchikey' in miriam_dict and not iden_str:
                    if miriam_dict['inchikey']:
                        if len(miriam_dict['inchikey'])==1:
                            iden_str = miriam_dict['inchikey'][0]
                        else:
                            self.logger.warning('There are multiple values of inchikey: '+str(miriam_dict['inchikey']))
                            self.logger.warning('Taking the first one')
                            iden_str = miriam_dict['inchikey'][0]
                if not iden_str:
                    self.logger.warning('Could not extract string input for '+str(miriam_dict))
                fo.write(str(spe_id)+"\t"+str(iden_str)+"\t\t\n")
            fo.write("\t\t\t\n")
            ################## Add FBA values ##############################
            #TODO: perhaps find a better way than just setting this to 1
            fo.write("!!SBtab TableID='Flux' TableType='Quantity' Unit='mM/s'\t\t\t\n")
            fo.write("!QuantityType\t!Reaction\t!Value\t\n")
            for react in [self.rpsbml.model.getReaction(i.getIdRef()) for i in rp_pathway.getListOfMembers()]:
                brs_annot = self.rpsbml.readBRSYNTHAnnotation(react.getAnnotation())
                if fba_id:
                    if fba_id in brs_annot:
                        #the saved value is mmol_per_gDW_per_hr while rpEq required mmol_per_s
                        #WARNING: rpEq says that these values are mMol/s while here we have mMol/gDw/h. However not changing since this would mean doing /3600 and
                        #given the values that seems wrong
                        fo.write("rate of reaction\t"+str(react.getId())+"\t"+str(brs_annot[fba_id]['value'])+"\t\n")
                    else:
                        self.logger.warning('Cannot retreive the FBA value '+str(fba_id)+'. Setting a default value of 1.')
                        fo.write("rate of reaction\t"+str(react.getId())+"\t1\t\n")
                else:
                    fo.write("rate of reaction\t"+str(react.getId())+"\t1\t\n")
            ################## Add the concentration bounds ##############################
            fo.write("\t\t\t\n")
            fo.write("!!SBtab TableID='ConcentrationConstraint' TableType='Quantity' Unit='mM'\t\t\t\n")
            fo.write("!QuantityType\t!Compound\t!Min\t!Max\n")
            for spe_id in rp_species: 
                self.logger.debug('========= '+str(spe_id)+' ========')
                is_found = False
                spe = self.rpsbml.model.getSpecies(spe_id)
                miriam_dict = self.rpsbml.readMIRIAMAnnotation(spe.getAnnotation())
                self.logger.debug(miriam_dict)
                if not miriam_dict:
                    self.logger.warning('The object annotation does not have any MIRIAM entries')
                    continue
                if 'metanetx' in miriam_dict:
                    self.logger.debug(miriam_dict['metanetx'])
                    for mnx in miriam_dict['metanetx']:
                        if mnx in list(self.mnx_default_conc.keys()) and not is_found:
                            self.logger.debug('Found default concentration range for '+str(spe.getId())+' ('+str(mnx)+'): '+str(self.mnx_default_conc[mnx]))
                            fo.write("concentration\t"+spe.getId()+"\t"+str(self.mnx_default_conc[mnx]['c_min'])+"\t"+str(self.mnx_default_conc[mnx]['c_max'])+"\n")
                            is_found = True
                if not is_found:
                    self.logger.debug('Using default values for '+str(spe.getId()))
                    fo.write("concentration\t"+spe.getId()+"\t0.001\t10\n")
            fo.write("\t\t\t\n")
            ############################ Add the thermo value ###########################
            #TODO: perform on the fly thermodynamic calculations when the values are not included within the SBML file
            if thermo_id:
                fo.write("!!SBtab TableID='Thermodynamics' TableType='Quantity' StandardConcentration='M'\t\t\t\n")
                fo.write("!QuantityType\t!Reaction\t!Compound\t!Value\t!Unit\n")
                for react in [self.rpsbml.model.getReaction(i.getIdRef()) for i in rp_pathway.getListOfMembers()]:
                    brs_annot = self.rpsbml.readBRSYNTHAnnotation(react.getAnnotation())
                    try:
                        #TODO: switch to dfG_prime_m when you are sure how to calculate it using the native equilibrator function
                        if thermo_id in brs_annot:
                            if brs_annot[thermo_id]:
                                fo.write("reaction gibbs energy\t"+str(react.getId())+"\t\t"+str(brs_annot['dfG_prime_o']['value'])+"\tkJ/mol\n")
                            else:
                                self.logger.error(str(thermo_id)+' is empty. Was rpThermodynamics run on this SBML? Aborting...')
                                return False
                        else:
                            self.logger.error('There is no '+str(thermo_id)+' in the reaction '+str(react.getId()))
                            return False
                    except KeyError:
                        self.logger.error('The reaction '+str(react.getId())+' does not seem to have the following thermodynamic value: '+str(thermo_id))
                        return False
        return True


    def MDF(self, pathway_id='rp_pathway', thermo_id='dfG_prime_o', fba_id='fba_obj_fraction', stdev_factor=1.96, write_results=True):
        """Perform MDF analysis on the rpSBML file 

        :param pathway_id: The id of the heterologous pathway of interest (Default: rp_pathway)
        :param thermo_id: The id of the thermodynamics result to be exported to the SBtab file (Default: dfG_prime_o, Valid Options: [dfG_prime_o, dfG_prime_m])
        :param fba_id: The id of the FBA value to be exported to SBtab (Default: fba_obj_fraction)
        :param stdev_factor: The standard deviation factor (Default: 1.96)
        :param write_results: Write the results to the rpSBML file (Default: True)

        :type pathway_id: str
        :type thermo_id: str
        :type fba_id: str
        :type stdev_factor: float
        :type write_results: bool

        :rtype: float
        :return: MDF of the pathway
        """
        to_ret_mdf = None
        groups = self.rpsbml.model.getPlugin('groups')
        rp_pathway = groups.getGroup(pathway_id)
        with tempfile.TemporaryDirectory() as tmpOutputFolder:
            path_sbtab = os.path.join(tmpOutputFolder, 'tmp_sbtab.tsv')
            sbtab_status = self.toNetworkSBtab(path_sbtab, pathway_id=pathway_id, thermo_id=thermo_id, fba_id=fba_id, stdev_factor=stdev_factor)
            if not sbtab_status:
                self.logger.error('There was a problem generating the SBtab... aborting')
                return 0.0
            try:
                pp = Pathway.from_sbtab(path_sbtab, comp_contrib=self.cc)
                pp.update_standard_dgs()
                try:
                    mdf_sol = pp.calc_mdf()
                except:
                    self.logger.warning('The calc_mdf function failed')
                    self.logger.warning('Exception: Cannot solve MDF primal optimization problem')
                    self.rpsbml.addUpdateBRSynth(rp_pathway, 'MDF', 0.0, 'kj_per_mol')
                    return 0.0
                #mdf_sol = pp.mdf_analysis()
                #plt_reac_plot = mdf_sol.reaction_plot
                #plt_cmp_plot = mdf_sol.compound_plot
                to_ret_mdf = float(mdf_sol.mdf.m)
                if write_results:
                    self.rpsbml.addUpdateBRSynth(rp_pathway, 'MDF', float(mdf_sol.mdf.m), 'kj_per_mol')
            except KeyError as e:
                self.logger.warning('Cannot calculate MDF')
                self.logger.warning(e)
                self.rpsbml.addUpdateBRSynth(rp_pathway, 'MDF', 0.0, 'kj_per_mol')
                return 0.0
            except equilibrator_cache.exceptions.MissingDissociationConstantsException as e:
                self.logger.warning('Some species are invalid: '+str(e))
                self.rpsbml.addUpdateBRSynth(rp_pathway, 'MDF', 0.0, 'kj_per_mol')
                return 0.0
        return to_ret_mdf
def main():

    ## Obtain an equation from KEGG reaction ID
    rxn_df = kegg.get_individual('reaction')
    equation_df = pd.DataFrame(
        map(gre.get_keggrxn_equation,
            gre.queue_pop_column_from_df(rxn_df, 'reaction')))
    equation_df.to_csv("./database/keggrxn_equation.tsv", sep='\t', index=None)

    ## Get compounds from reaction equations
    cc = ComponentContribution()
    non_dg = 0
    non_fdg = 0

    coef_list = ['\d\d C', '\d C']
    n_list = [
        ' \w ', ' ', ' \W \w ', ' \d \w ', ' \w\-\d \w ', ' \(side \d\) ',
        ' \(\w\) ', ' \d\w ', ' \(\w\-\w\) ', ' \(\w\+\w\) ', ' \(\w\-\d\) ',
        ' \(\w\+\d\) '
    ]

    for coef in coef_list:
        equation_df.replace(coef, 'C', regex=True, inplace=True)
    for n in n_list:
        equation_df.replace(n, ' ', regex=True, inplace=True)

    pool = Pool(processes=4)
    result = pool.map(partial(gcr.get_dg_product, dataframe=equation_df),
                      gcr.count_df_columns(equation_df))
    print(f" # of Non-ΔG'0 = {non_dg}")
    print(f" # of Non-ΔfG' = {non_fdg}")
    pool.close()
    pool.join()
    cpd_df = pd.DataFrame(result)
    total_df = pd.concat([equation_df, cpd_df], axis=1)
    '''
    The duplicated product in reaction is going to be omitted
    ex) Reaction : C04150 + G10509 = C02970 + G10509
    Reaction's product : C04150,C02970,G10509
    '''
    temp = []
    for individual_cpds in total_df.cpd:
        cpd_propr = ','.join(set(individual_cpds.split(',')))
        temp.append(cpd_propr)
    total_df.cpd = temp
    total_df.to_csv("./database/kegg_rxn_cpd_equilibrator.tsv",
                    sep='\t',
                    index=None)

    ## Assign glycans to compounds
    rxn_mixcpd = total_df[['ENTRY', 'cpd']]
    rxn_mixcpd.columns = ['reaction', 'cpd']

    rxn_mixcpd = \
    (rxn_mixcpd.set_index(rxn_mixcpd.columns.drop('cpd',1).tolist())
    .cpd.str.split(',', expand=True)
    .stack()
    .reset_index()
    .rename(columns={0:'cpd'})
    .loc[:, rxn_mixcpd.columns]
    )

    rxn_cpd_nog = rxn_mixcpd[~rxn_mixcpd.cpd.str.contains('G')]
    rxn_cpd_g = rxn_mixcpd[rxn_mixcpd.cpd.str.contains('G')]
    rxn_cpd_g.columns = ['reaction', 'glycan']

    glycan_cpd = kegg.get_interaction("cpd", "glycan")
    rxn_glycan_cpd = pd.merge(rxn_cpd_g, glycan_cpd, on='glycan')

    rxn_cpd = rxn_glycan_cpd[['reaction', 'cpd']]
    concat_cpd = pd.concat([rxn_cpd_nog, rxn_cpd]).drop_duplicates()
    concat_cpd.to_csv("./database/kegg_rxn_cpd_nog.tsv", sep='\t', index=None)

    ## Removal of generic compound
    generic_cpd = pd.read_csv("./database/kegg_generic_cpd.tsv", sep='\t')
    kegg_rxn_cpd_noggeneric = concat_cpd[~concat_cpd.cpd.isin(generic_cpd.kegg
                                                              )]
    kegg_rxn_cpd_noggeneric.to_csv("./database/kegg_rxn_cpd_noggeneric.tsv",
                                   sep='\t',
                                   index=None)

    ## Map uniref90 to compound

    # EC - Reaction - Cpd
    uniref90_ec = renew.renew_humann_ec()
    ec_rn = kegg.get_interaction("reaction", "ec")

    uni_ec_rn = pd.merge(uniref90_ec, ec_rn, on='ec')
    uni_ec_rn_cpd = pd.merge(uni_ec_rn, kegg_rxn_cpd_noggeneric, on='reaction')
    erc = uni_ec_rn_cpd[['uniref90', 'cpd']].drop_duplicates()

    # KO - EC - Reaction - Cpd
    uniref90_ko = humann.humann_api("ko", "uniref90")
    ko_ec = kegg.get_interaction("ec", "ko")

    uni_ko_ec = pd.merge(uniref90_ko, ko_ec, on='ko')
    uni_ko_ec_rn = pd.merge(uni_ko_ec, ec_rn, on='ec')
    uni_ko_ec_rn_cpd = pd.merge(uni_ko_ec_rn,
                                kegg_rxn_cpd_noggeneric,
                                on='reaction')
    kerc = uni_ko_ec_rn_cpd[['uniref90', 'cpd']].drop_duplicates()

    # KO - Reaction - Cpd
    ko_rn = kegg.get_interaction("reaction", "ko")

    uni_ko_rn = pd.merge(uniref90_ko, ko_rn, on='ko')
    uni_ko_rn_cpd = pd.merge(uni_ko_rn, kegg_rxn_cpd_noggeneric, on='reaction')
    krc = uni_ko_rn_cpd[['uniref90', 'cpd']].drop_duplicates()

    # # Merge
    kerkc = pd.concat([erc, kerc, krc]).drop_duplicates()

    kerkc = \
    (kerkc.set_index(kerkc.columns.drop('uniref90',1).tolist())
    .uniref90.str.split(',', expand=True)
    .stack()
    .reset_index()
    .rename(columns={0:'uniref90'})
    .loc[:, kerkc.columns]
    )

    kerkc.to_csv("./database/uniref_com_kerk_oc.tsv", sep='\t', index=None)
Beispiel #22
0
class Thermodynamics:
    """Class to calculate thermodynamics of Pickaxe runs.

    Thermodynamics allows for the calculation of:
        1) Standard ∆G' of formation
        2) Standard ∆G'o of reaction
        3) Physiological ∆G'm of reaction
        4) Adjusted ∆G' of reaction

    eQuilibrator objects can also be obtained from r_ids and c_ids.

    Parameters
    ----------
    mongo_uri: str
        URI of the mongo database.
    client: MongoClient
        Connection to Mongo.
    CC: ComponentContribution
        eQuilibrator Component Contribution object to calculate ∆G with.
    lc: LocalCompoundCache
        The local compound cache to generate eQuilibrator compounds from.
    """

    def __init__(
        self,
    ):
        # Mongo params
        self.mongo_uri = None
        self.client = None
        self._core = None

        # eQ params
        self.CC = ComponentContribution()
        self.lc = None
        self._water = None

    def load_mongo(self, mongo_uri: Union[str, None] = None):
        if mongo_uri:
            self.mongo_uri = mongo_uri
            self.client = MongoClient(mongo_uri)
        else:
            self.mongo_uri = "localhost:27017"
            self.client = MongoClient()

        self._core = self.client["core"]

    def _all_dbs_loaded(self):
        if self.client and self._core and self.lc:
            return True
        else:
            print("Load connection to Mongo and eQuilibrator local cache.")
            return False

    def _eq_loaded(self):
        if self.lc:
            return True
        else:
            print("Load eQulibrator local cache.")
            return False

    def _reset_CC(self):
        """reset CC back to defaults"""
        self.CC.p_h = default_physiological_p_h
        self.CC.p_mg = default_physiological_p_mg
        self.CC.temperature = default_physiological_temperature
        self.CC.ionic_strength = default_physiological_ionic_strength

    def load_thermo_from_postgres(
        self, postgres_uri: str = "postgresql:///eq_compounds"
    ) -> None:
        """Load a LocalCompoundCache from a postgres uri for equilibrator.

        Parameters
        ----------
        postgres_uri : str, optional
            uri of the postgres DB to use, by default "postgresql:///eq_compounds"
        """
        self.lc = LocalCompoundCache()
        self.lc.ccache = CompoundCache(create_engine(postgres_uri))

        self._water = self.lc.get_compounds("O")

    def load_thermo_from_sqlite(
        self, sqlite_filename: str = "compounds.sqlite"
    ) -> None:
        """Load a LocalCompoundCache from a sqlite file for equilibrator.

        compounds.sqlite can be generated through LocalCompoundCache's method
        generate_local_cache_from_default_zenodo

        Parameters
        ----------
        sqlite_filename: str
            filename of the sqlite file to load.
        """
        self.lc = LocalCompoundCache()
        self.lc.load_cache(sqlite_filename)

        self._water = self.lc.get_compounds("O")

    def get_eQ_compound_from_cid(
        self, c_id: str, pickaxe: Pickaxe = None, db_name: str = None
    ) -> Union[Compound, None]:
        """Get an equilibrator compound for a given c_id from the core.

        Attempts to retrieve a compound from the core or a specified db_name.

        Parameters
        ----------
        c_id : str
            compound ID for MongoDB lookup of a compound.
        pickaxe : Pickaxe
            pickaxe object to look for the compound in, by default None.
        db_name : str
            Database to look for compound in before core database, by default None.

        Returns
        -------
        equilibrator_assets.compounds.Compound
            eQuilibrator Compound
        """
        # Find locally in pickaxe
        compound_smiles = None
        if pickaxe:
            if c_id in pickaxe.compounds:
                compound_smiles = pickaxe.compounds[c_id]["SMILES"]
            else:
                return None

        # Find in mongo db
        elif self._all_dbs_loaded():
            if db_name:
                compound = self.client[db_name].compounds.find_one(
                    {"_id": c_id}, {"SMILES": 1}
                )
                if compound:
                    compound_smiles = compound["SMILES"]

            # No cpd smiles from database name
            if not compound_smiles:
                compound = self._core.compounds.find_one({"_id": c_id}, {"SMILES": 1})
                if compound:
                    compound_smiles = compound["SMILES"]

        # No compound_smiles at all
        if not compound_smiles or "*" in compound_smiles:
            return None
        else:
            eQ_compound = self.lc.get_compounds(
                compound_smiles, bypass_chemaxon=True, save_empty_compounds=True
            )
            return eQ_compound

    def standard_dg_formation_from_cid(
        self, c_id: str, pickaxe: Pickaxe = None, db_name: str = None
    ) -> Union[float, None]:
        """Get standard ∆Gfo for a compound.

        Parameters
        ----------
        c_id : str
            Compound ID to get the ∆Gf for.
        pickaxe : Pickaxe
            pickaxe object to look for the compound in, by default None.
        db_name : str
            Database to look for compound in before core database, by default None.

        Returns
        -------
        Union[float, None]
            ∆Gf'o for a compound, or None if unavailable.
        """
        eQ_cpd = self.get_eQ_compound_from_cid(c_id, pickaxe, db_name)
        if not eQ_cpd:
            return None
        dgf = self.CC.standard_dg_formation(eQ_cpd)
        dgf = dgf[0]

        return dgf

    def get_eQ_reaction_from_rid(
        self, r_id: str, pickaxe: Pickaxe = None, db_name: str = None
    ) -> Union[PhasedReaction, None]:
        """Get an eQuilibrator reaction object from an r_id.

        Parameters
        ----------
        r_id : str
            Reaction id to get object for.
        pickaxe : Pickaxe
            pickaxe object to look for the compound in, by default None.
        db_name : str
            Database to look for reaction in.

        Returns
        -------
        PhasedReaction
            eQuilibrator reactiono to calculate ∆Gr with.
        """
        if pickaxe:
            if r_id in pickaxe.reactions:
                reaction_info = pickaxe.reactions[r_id]
            else:
                return None
        elif db_name:
            mine = self.client[db_name]
            reaction_info = mine.reactions.find_one({"_id": r_id})
            if not reaction_info:
                return None
        else:
            return None

        reactants = reaction_info["Reactants"]
        products = reaction_info["Products"]

        lhs = " + ".join(f"{r[0]} {r[1]}" for r in reactants)
        rhs = " + ".join(f"{p[0]} {p[1]}" for p in products)
        reaction_string = " => ".join([lhs, rhs])

        compounds = set([r[1] for r in reactants])
        compounds.update(tuple(p[1] for p in products))

        eQ_compound_dict = {
            c_id: self.get_eQ_compound_from_cid(c_id, pickaxe, db_name)
            for c_id in compounds
        }

        if not all(eQ_compound_dict.values()):
            return None

        if "X73bc8ef21db580aefe4dbc0af17d4013961d9d17" not in compounds:
            eQ_compound_dict["water"] = self._water

        eq_reaction = Reaction.parse_formula(eQ_compound_dict.get, reaction_string)

        return eq_reaction

    def physiological_dg_prime_from_rid(
        self, r_id: str, pickaxe: Pickaxe = None, db_name: str = None
    ) -> Union[pint.Measurement, None]:
        """Calculate the ∆Gm' of a reaction.

        Parameters
        ----------
        r_id : str
            ID of the reaction to calculate.
        pickaxe : Pickaxe
            pickaxe object to look for the compound in, by default None.
        db_name : str
            MINE the reaction is found in.

        Returns
        -------
        pint.Measurement
            The calculated ∆G'm.
        """
        eQ_reaction = self.get_eQ_reaction_from_rid(r_id, pickaxe, db_name)
        if not eQ_reaction:
            return None
        dGm_prime = self.CC.physiological_dg_prime(eQ_reaction)

        return dGm_prime

    def standard_dg_prime_from_rid(
        self, r_id: str, pickaxe: Pickaxe = None, db_name: str = None
    ) -> Union[pint.Measurement, None]:
        """Calculate the ∆G'o of a reaction.

        Parameters
        ----------
        r_id : str
            ID of the reaction to calculate.
        pickaxe : Pickaxe
            pickaxe object to look for the compound in, by default None.
        db_name : str
            MINE the reaction is found in.

        Returns
        -------
        pint.Measurement
            The calculated ∆G'o.
        """
        eQ_reaction = self.get_eQ_reaction_from_rid(r_id, pickaxe, db_name)
        if not eQ_reaction:
            return None
        dG0_prime = self.CC.standard_dg_prime(eQ_reaction)

        return dG0_prime

    def dg_prime_from_rid(
        self,
        r_id: str,
        pickaxe: Pickaxe = None,
        db_name: str = None,
        p_h: Q_ = default_physiological_p_h,
        p_mg: Q_ = default_physiological_p_mg,
        ionic_strength: Q_ = default_physiological_ionic_strength,
    ) -> Union[pint.Measurement, None]:
        """Calculate the ∆G' of a reaction.

        Parameters
        ----------
        r_id : str
            ID of the reaction to calculate.
        pickaxe : Pickaxe
            pickaxe object to look for the compound in, by default None.
        db_name : str
            MINE the reaction is found in.
        p_h : Q_
            pH of system.
        p_mg: Q_
            pMg of the system.
        ionic_strength: Q_
            ionic strength of the system.

        Returns
        -------
        pint.Measurement
            The calculated ∆G'.
        """
        eQ_reaction = self.get_eQ_reaction_from_rid(r_id, pickaxe, db_name)
        if not eQ_reaction:
            return None

        self.CC.p_h = p_h
        self.CC.p_mg = p_mg
        self.CC.ionic_strength = ionic_strength

        dG_prime = self.CC.dg_prime(eQ_reaction)

        self._reset_CC()

        return dG_prime
def create_reaction_id_to_dG0_mapping_json(model: cobra.Model, json_path: str) -> None:
    """Creates a reaction ID<->dG0 mapping using the Equilibrator API.

    This function uses the pregenerated BIGG ID to MetaNetX ID mapping, and
    lets the Equilibrator API calculate the dG0 values for each reaction using
    the MetaNetX IDs for the metabolites.

    This function is specifically written for the E. coli models used in CommModelPy's
    publication as its selection of dG0 values is based on BiGG identifiers and as it
    contains specific dG0 corrections for special reactions as referenced in the
    comments of this function.
    However, the general basis of this function can be easily adapted for other types of models.

    Arguments:
    * model: cobra.Model ~ The cobrapy model instance for which the dG0 mapping shall be created.
    * json_path: str ~ The path to the dG0 JSON that is going to be created.

    Output:
    * No variable but a JSON file with the mapping at json_path
    """
    bigg_id_to_metanetx_id = json_load("./publication_runs/ecoli_models/bigg_id_to_metanetx_mapping_json/bigg_id_to_metanetx_id_mapping_from_iML1515.json")
    reaction_id_dG0_mapping: Dict[str, Dict[str, float]] = {}
    cc = ComponentContribution()
    cc.p_h = Q_(7.0)
    cc.ionic_strength = Q_("0.25M")
    cc.temperature = Q_("298.15K")
    cc.p_mg = Q_(3.0)
    reaction_ids_without_dG0: List[str] = []
    for reaction in model.reactions:
        print("==dG0 GENERATION ATTEMPT FOR REACTION "+reaction.id+"==")
        # Exclude exchange reactions
        if reaction.id.startswith("EX_"):
            print("INFO: Reaction is identified as exchange reaction.")
            print("      No dG0 for this reaction!")
            reaction_ids_without_dG0.append(reaction.id)
            continue
        # Exclude special transport reactions
        if ("tpp_" in reaction.id+"_") or ("t1pp_" in reaction.id+"_") or ("tex_" in reaction.id+"_") or reaction.id.endswith("tex"):
            # tpp: Facilitated transport or (proton) symport
            # t1pp: Facilitated transport or (proton) symport
            # tex: "Via diffusion"
            print("INFO: Reaction is identified as special transport reaction.")
            print("      No dG0 for this reaction!")
            reaction_ids_without_dG0.append(reaction.id)
            continue
        # Exclude demand reactions
        if (reaction.id.startswith("DM_")):
            print("INFO: Reaction is identified as sink (demand) reaction.")
            print("      No dG0 for this reaction!")
            reaction_ids_without_dG0.append(reaction.id)
            continue

        educt_strings: List[str] = []
        product_strings: List[str] = []
        is_metanetx_id_missing = False
        for metabolite in reaction.metabolites.keys():
            base_metabolite_id = "".join(metabolite.id.split("_")[:-1])

            if base_metabolite_id.endswith("BIO"):
                base_metabolite_id = base_metabolite_id[:-len("BIO")]

            if base_metabolite_id not in bigg_id_to_metanetx_id.keys():
                is_metanetx_id_missing = True
                break

            if reaction.metabolites[metabolite] < 0:
                educt_strings.append(str(-reaction.metabolites[metabolite]) + " " + bigg_id_to_metanetx_id[base_metabolite_id])
            else:
                product_strings.append(str(reaction.metabolites[metabolite]) + " " + bigg_id_to_metanetx_id[base_metabolite_id])
        if is_metanetx_id_missing:
            print("INFO: MetanetX ID missing for " + base_metabolite_id)
            print("      No dG0 can be given for this reaction!")
            reaction_ids_without_dG0.append(reaction.id)
            continue
        educt_string = " + ".join(educt_strings)
        product_string = " + ".join(product_strings)
        reaction_string = educt_string + " = " + product_string
        print("Reaction string with MetanetX IDs: " + reaction_string)
        try:
            parsed_reaction = cc.parse_reaction_formula(reaction_string)
        except Exception as e:
            print("INFO: Equilibrator reaction parsing error")
            print(e)
            print("      No dG0 can be given for this reaction!")
            reaction_ids_without_dG0.append(reaction.id)
            continue
        try:
            dG0 = cc.standard_dg_prime(parsed_reaction)
            if dG0.m.s > 10000:
                print("INFO: dG0 standard deviation too high")
                print("      No dG0 can be given for this reaction!")
                reaction_ids_without_dG0.append(reaction.id)
                continue
            else:
                reaction_id_dG0_mapping[reaction.id+"_ecoli1"] = {}
                reaction_id_dG0_mapping[reaction.id+"_ecoli1"]["dG0"] = dG0.m.n
                reaction_id_dG0_mapping[reaction.id+"_ecoli1"]["uncertainty"] = 0
                reaction_id_dG0_mapping[reaction.id+"_ecoli2"] = {}
                reaction_id_dG0_mapping[reaction.id+"_ecoli2"]["dG0"] = dG0.m.n
                reaction_id_dG0_mapping[reaction.id+"_ecoli2"]["uncertainty"] = 0
                reaction_id_dG0_mapping[reaction.id+"_ecoli3"] = {}
                reaction_id_dG0_mapping[reaction.id+"_ecoli3"]["dG0"] = dG0.m.n
                reaction_id_dG0_mapping[reaction.id+"_ecoli3"]["uncertainty"] = 0
            print("dG0 calculation successful \\o/")
            print(f"dG0: {dG0}")
        except Exception as e:
            print("INFO:")
            print(e)
            print("      No dG0 can be given for this reaction!")
            reaction_ids_without_dG0.append(reaction.id)
            continue

        # Membrane-bound reaction corrections according to formula 9 in
        # Hamilton, J. J., Dwivedi, V., & Reed, J. L. (2013).
        # Quantitative assessment of thermodynamic constraints on
        # the solution space of genome-scale metabolic models.
        # Biophysical journal, 105(2), 512-522.
        # Formula is:
        # c_j * F * dPsi - 2.3 * h_j * R * T * dpH
        # c_j = net charge transported from outside to inside
        # h_j = Number of protons transported across membrane
        F = 0.10026  # kJ/mV/mol, or 0.02306; kcal/mV/mol
        dPsi = -130  # mV
        dpH = 0.4  # dimensionless
        R = 8.314e-3  # kJ⋅K⁻1⋅mol⁻1
        T = 298.15  # K
        # c_j and h_j are taken from supplementary table 3
        # of the mentioned publication (Hamilton et al., 2013)
        c_j = 0.0
        h_j = 0.0
        # ATP synthase
        if reaction.id.startswith("ATPS4"):
            c_j = 4.0
            h_j = 4.0
        # NADH dehydrogenases
        elif reaction.id.startswith("NADH16"):
            c_j = -3.5
            h_j = -3.5
        elif reaction.id.startswith("NADH17"):
            c_j = -2.0
            h_j = -2.0
        elif reaction.id.startswith("NADH18"):
            c_j = -2.8
            h_j = -2.8
        # Cytochrome dehydrogenases
        elif reaction.id.startswith("CYTBD"):
            c_j = -2.0
            h_j = -2.0
        elif reaction.id.startswith("CYTBO3"):
            c_j = -2.5
            h_j = -2.5

        if (c_j != 0) and (h_j != 0):
            print("Correcting dG0 for special membrane-bound reaction...")
            dG0_correction = c_j * F * dPsi - 2.3 * h_j * R * T * dpH
            print(f"Correction factor is {dG0_correction}")
            reaction_id_dG0_mapping[reaction.id+"_ecoli1"]["dG0"] += dG0_correction
            reaction_id_dG0_mapping[reaction.id+"_ecoli2"]["dG0"] += dG0_correction
            reaction_id_dG0_mapping[reaction.id+"_ecoli3"]["dG0"] += dG0_correction
            print("New dG0 is " + str(reaction_id_dG0_mapping[reaction.id+"_ecoli1"]["dG0"]))

    num_reactions_without_dG0 = len(reaction_ids_without_dG0)
    print("\n==FINAL STATISTICS==")
    print("No dG0 for the following reactions:")
    print("\n".join(reaction_ids_without_dG0))
    print("Total number: ", str(num_reactions_without_dG0))
    print("Fraction from all reactions: ", num_reactions_without_dG0/len(model.reactions))

    json_write(json_path, reaction_id_dG0_mapping)
Beispiel #24
0
from equilibrator_api import ComponentContribution
from equilibrator_api.model import StoichiometricModel
import numpy as np
import scipy.io
from scipy.linalg import pinvh
import sbtab
import pandas as pd
import sys

cc = ComponentContribution()

# Load the model from the SBtab file
pp = StoichiometricModel.from_sbtab("MDF_model_noFA_noEX.tsv", comp_contrib=cc)

# recalculate the standard_dg using the 'minimize_norm' option
mu, dg_cov = cc.standard_dg_prime_multi(pp.reactions,
                                        uncertainty_representation="cov",
                                        minimize_norm=True)
standard_dg_prime_in_kJ_per_mol = mu.m_as("kJ/mol").round(2)
dg_sigma_in_kJ_per_mol = np.sqrt(np.diag(dg_cov.m_as("kJ**2/mol**2"))).round(2)

# write the estimated values to a SBtab ifle (out1.tsv)
reaction_df = pd.DataFrame(zip(pp.reaction_ids, pp.reaction_formulas,
                               standard_dg_prime_in_kJ_per_mol,
                               dg_sigma_in_kJ_per_mol),
                           columns=[
                               "reaction_id", "reaction_formula",
                               "standard_dg_prime_in_kJ_per_mol",
                               "dg_sigma_in_kJ_per_mol"
                           ])
sbtabdoc = sbtab.SBtab.SBtabDocument()
Beispiel #25
0
import optlang
from cobra import Model
from cobra.core.dictlist import DictList
from equilibrator_api import ComponentContribution
from scipy import stats
from six import iteritems

from ..util.constraints import delG_indicator, directionality
from ..util.linalg_fun import *
from ..util.thermo_constants import *
from .compound import Thermo_met
from .reaction import thermo_reaction
from .solution import get_legacy_solution, get_solution

api = ComponentContribution()

# present_dir = os.path.normpath(os.path.dirname(os.path.abspath(__file__)))
cwd = os.getcwd()

import logging

cache_file = Path(__file__).parent.parent / "data" / "compounds_cache.pickle"

logs_dir = cwd + os.sep + "logs"
if not os.path.exists(logs_dir):
    os.makedirs(logs_dir)

logging.basicConfig(filename=logs_dir + os.sep + "thermo_model.log",
                    level=logging.DEBUG)
Beispiel #26
0
def runThermo(
    pathway: rpPathway,
    cc: ComponentContribution=None,
    ph: float=DEFAULT_pH,
    ionic_strength: float=DEFAULT_ionic_strength,
    pMg: float=DEFAULT_pMg,
    compound_substitutes: Dict = None,
    logger: Logger = getLogger(__name__)
) -> Dict:
    """Given a tar input file, perform thermodynamics analysis for each rpSBML file.

    :param inFile: The path to the input file
    :param outFile: The path to the output file
    :param pathway_id: The id of the heterologous pathway of interest
    :param ph: The pH of the host organism (Default: 7.5)
    :param ionic_strength: Ionic strenght of the host organism (Default: 0.25M)
    :param pMg: The pMg of the host organism (Default: 3.0)
    :param temp_k: The temperature of the host organism in Kelvin (Default: 298.15)
    :param stdev_factor: The standard deviation factor to calculate MDF (Default: 1.96)

    :type pathway: Dict
    :type pathway_id: str
    :type ph: float
    :type ionic_strength: float
    :type pMg: float
    :type temp_k: float
    :type logger: Logger

    :rtype: Dict
    :return: Pathway updated with thermodynalics values
    """

    print_title(
        txt='Pathway Reactions',
        logger=logger,
        waiting=False
    )
    for rxn in pathway.get_list_of_reactions():
        print_reaction(
            rxn=rxn,
            logger=logger
        )

    ## INTERMEDIATE COMPOUNDS
    # Optimise the production of target
    # and remove (if possible) intermediate compounds
    reactions = remove_compounds(
        compounds=pathway.get_intermediate_species(),
        reactions=pathway.get_list_of_reactions(),
        rxn_target_id=pathway.get_target_rxn_id(),
        logger=logger
    )

    ## eQuilibrator
    if cc is None:
        cc = initThermo(
            ph,
            ionic_strength,
            pMg,
            logger
        )

    # Search for the key ID known by eQuilibrator
    cc_species = {}
    substituted_species = {}
    sep = '__64__'
    if compound_substitutes is None:
        compound_substitutes = read_compound_substitutes(
            os_path.join(
                os_path.dirname(os_path.realpath(__file__)),
                'data',
                'compound_substitutes.csv'
            )
        )
    for spe in pathway.get_species():
        spe_split = spe.get_id().split(sep)
        if len(spe_split) > 1:
            _compound_substitutes = {k+sep+spe_split[1]: v for k, v in compound_substitutes.items()}
        else:
            _compound_substitutes = deepcopy(compound_substitutes)
        # If the specie is listed in substitutes file, then take search values from it
        # Check if starts with in case of compound names are like CMPD_NAME__64__COMPID
        if spe.get_id() in _compound_substitutes:
            cc_species[spe.get_id()] = search_equilibrator_compound(
                cc=cc,
                id=_compound_substitutes[spe.get_id()]['id'],
                inchikey=_compound_substitutes[spe.get_id()]['inchikey'],
                inchi=_compound_substitutes[spe.get_id()]['inchi'],
                logger=logger
            )
        # Else, take search values from rpCompound
        else:
            cc_species[spe.get_id()] = search_equilibrator_compound(
                cc=cc,
                id=spe.get_id(),
                inchikey=spe.get_inchikey(),
                inchi=spe.get_inchi(),
                smiles=spe.get_smiles(),
                logger=logger
            )
        if cc_species[spe.get_id()] != {}:
            if spe.get_id() != cc_species[spe.get_id()]['id']:
                substituted_species[spe.get_id()] = cc_species[spe.get_id()][cc_species[spe.get_id()]['cc_key']]
        else:
            logger.warning(f'Compound {spe.get_id()} has not been found within eQuilibrator cache')

    # Store thermo values for the net reactions
    # and for each of the reactions within the pathway
    results = {
        'net_reaction': {},
        'optimized_net_reaction': Reaction.sum_stoichio(reactions),
        'reactions': {},
        'optimized_reactions': {
            rxn.get_id(): rxn
            for rxn in reactions
        },
        'species': {},
        'substituted_species': substituted_species
    }

    # Get the formation energy for each compound
    for spe_id, cc_spe in cc_species.items():
        try:
            value = cc.standard_dg_formation(
                cc.get_compound(
                    cc_spe[cc_spe['cc_key']]
                )
            )[0]  # get .mu
        except Exception as e:
            value = None
            logger.debug(e)
        if value is None:
            value = 'NaN'
        results['species'][spe_id] = {
            'standard_dg_formation': {
                'value': value,
                'units': 'kilojoule / mole'
            }
        }

    # Build the list of IDs known by eQuilibrator
    species_cc_ids = {}
    for spe_id, cc_spe in cc_species.items():
        if cc_spe == {}:
            species_cc_ids[spe_id] = spe_id
        else:
            species_cc_ids[spe_id] = cc_spe[cc_spe['cc_key']]

    ## REACTIONS
    # Compute thermo for each reaction
    for rxn in pathway.get_list_of_reactions():
        results['reactions'][rxn.get_id()] = eQuilibrator(
            species_stoichio=rxn.get_species(),
            species_ids=species_cc_ids,
            cc=cc,
            logger=logger
        )

    ## THERMO
    print_title(
        txt='Computing thermodynamics (eQuilibrator)...',
        logger=logger,
        waiting=True
    )

    results['net_reaction'] = eQuilibrator(
        species_stoichio=Reaction.sum_stoichio(reactions),
        species_ids=species_cc_ids,
        cc=cc,
        logger=logger
    )

    print_OK(logger)

    # Write results into the pathway
    write_results_to_pathway(pathway, results, logger)

    return results
        reactions_dict[rxn]["stoichiometry"])

    All_Mol = True
    Some_Mol = False
    for rgt in rxn_cpds_array:
        if (rgt['compound'] not in seed_mnx_structural_map):
            All_Mol = False
            Some_Mol = True

    if (All_Mol is True):
        complete_mol_rxns_dict[rxn] = 1
    elif (Some_Mol is True):
        incomplete_mol_rxns_dict[rxn] = 1

equilibrator_calculator = ComponentContribution(p_h=Q_(7.0),
                                                ionic_strength=Q_("0.25M"),
                                                temperature=Q_("298.15K"))
output_name = thermodynamics_root + 'eQuilibrator/MetaNetX_Reaction_Energies.tbl'
output_handle = open(output_name, 'w')
for rxn in reactions_dict:
    if (reactions_dict[rxn]['status'] == "EMPTY"):
        continue

    notes_list = reactions_dict[rxn]['notes']
    if (not isinstance(notes_list, list)):
        notes_list = list()

    if (rxn not in complete_mol_rxns_dict):

        #'EQ' means equilibrator approach to calculating energies
        #'P' means partial, as in some of the reagents have energies calculated thus
Beispiel #28
0
def create_drG_dict(equations, dfG_dict=None, pH=7.0):
    eq_api = ComponentContribution(pH=pH, ionic_strength=0.1)
    return dict(
        zip(equations,
            [reaction_gibbs(x, dfG_dict, pH, eq_api) for x in equations]))