Beispiel #1
0
def get_dgf_priors(mi: MaudInput) -> Tuple[pd.Series, pd.DataFrame]:
    """Given a Maud input, get a multivariate prior from equilibrator.

    Returns a pandas Series of prior means and a pandas DataFrame of
    covariances. Both are indexed by metabolite ids.

    :param mi: A MaudInput object

    """
    cc = ComponentContribution()
    mu = []
    sigmas_fin = []
    sigmas_inf = []
    external_ids = {m.id: m.inchi_key for m in mi.kinetic_model.metabolites}
    met_ix = pd.Index(mi.stan_coords.metabolites, name="metabolite")
    for met_id in met_ix:
        external_id = external_ids[met_id]
        if external_id is None:
            raise ValueError(f"metabolite {met_id} has no external id.")
        c = cc.get_compound(external_id)
        if isinstance(c, Compound):
            mu_c, sigma_fin_c, sigma_inf_c = cc.standard_dg_formation(c)
            mu.append(mu_c)
            sigmas_fin.append(sigma_fin_c)
            sigmas_inf.append(sigma_inf_c)
        else:
            raise ValueError(f"cannot find compound for metabolite {met_id}"
                             f" with external id {external_id}.")
    sigmas_fin = np.array(sigmas_fin)
    sigmas_inf = np.array(sigmas_inf)
    cov = sigmas_fin @ sigmas_fin.T + 1e6 * sigmas_inf @ sigmas_inf.T
    return (
        pd.Series(mu, index=met_ix, name="prior_mean_dgf").round(10),
        pd.DataFrame(cov, index=met_ix, columns=met_ix).round(10),
    )
Beispiel #2
0
def search_equilibrator_compound(
    cc: ComponentContribution,
    id: str = None,
    inchikey: str = None,
    inchi: str = None,
    smiles: str = None,
    logger: Logger = getLogger(__name__)
) -> Dict[str, str]:

    def copy_data(
        compound,
        data: Dict,
        overwrite: bool = False,
    ) -> Dict:
        # ...copy initial data into result compound
        _compound = deepcopy(data)
        # fill with eQuilibrator data for empty fields
        for k, v in _compound.items():
            if (
                overwrite
                or v is None
                or v == ''
            ): _compound[k] = getattr(compound, k)
        # keep the key known by eQuilibrator
        _compound['cc_key'] = key
        # keep the value known by eQuilibrator
        _compound[key] = val
        return _compound

    data = {
        'id': id,
        'inchi_key': inchikey,
        'inchi': inchi,
        'smiles': smiles
    }
    for key, val in data.items():
        if val:
            compound = cc.get_compound(val)
            # If compound is found in eQuilibrator, then...
            if compound is not None:
                # ...copy initial data into result compound
                _compound = copy_data(compound, data)
                return _compound

    if inchikey:
        # In last resort, try to search only with the first part of inchikey
        compounds = cc.search_compound_by_inchi_key(
            # first part of inchikey
            inchikey.split('-')[0]
        )
        # eQuilibrator returns a list of compounds
        if compounds:
            # first compound in the list, hope it is sorted by decrease relevance
            _compound = copy_data(compounds[0], data, overwrite=True)
            # make inchi_key the ID key
            _compound['cc_key'] = 'inchi_key'
            return _compound

    return {}
def get_dgf_priors(mi: MaudInput) -> Tuple[pd.Series, pd.DataFrame]:
    """Given a Maud input, get a multivariate prior from equilibrator.

    Returns a pandas Series of prior means and a pandas DataFrame of
    covariances. Both are indexed by metabolite ids.

    :param mi: A MaudInput object

    """
    cc = ComponentContribution()
    mu = []
    sigmas_fin = []
    sigmas_inf = []
    met_ix = pd.Index(mi.stan_coords.metabolites, name="metabolite")
    met_order = [m.id for m in mi.kinetic_model.metabolites]
    for m in mi.kinetic_model.metabolites:
        external_id = m.id if m.inchi_key is None else m.inchi_key
        c = cc.get_compound(external_id)
        if isinstance(c, Compound):
            mu_c, sigma_fin_c, sigma_inf_c = cc.standard_dg_formation(c)
            mu_c += c.transform(
                cc.p_h, cc.ionic_strength, cc.temperature, cc.p_mg
            ).m_as("kJ/mol")
            mu.append(mu_c)
            sigmas_fin.append(sigma_fin_c)
            sigmas_inf.append(sigma_inf_c)
        else:
            raise ValueError(
                f"cannot find compound for metabolite {m.id}"
                f" with external id {external_id}."
                "\nConsider setting the field metabolite_inchi_key"
                " if you haven't already."
            )
    sigmas_fin = np.array(sigmas_fin)
    sigmas_inf = np.array(sigmas_inf)
    cov = sigmas_fin @ sigmas_fin.T + 1e6 * sigmas_inf @ sigmas_inf.T
    cov = (
        pd.DataFrame(cov, index=met_order, columns=met_order)
        .loc[met_ix, met_ix]
        .round(10)
    )
    mu = (
        pd.Series(mu, index=met_order, name="prior_mean_dgf")
        .loc[met_ix]
        .round(10)
    )
    return mu, cov
Beispiel #4
0
def runThermo(
    pathway: rpPathway,
    cc: ComponentContribution=None,
    ph: float=DEFAULT_pH,
    ionic_strength: float=DEFAULT_ionic_strength,
    pMg: float=DEFAULT_pMg,
    compound_substitutes: Dict = None,
    logger: Logger = getLogger(__name__)
) -> Dict:
    """Given a tar input file, perform thermodynamics analysis for each rpSBML file.

    :param inFile: The path to the input file
    :param outFile: The path to the output file
    :param pathway_id: The id of the heterologous pathway of interest
    :param ph: The pH of the host organism (Default: 7.5)
    :param ionic_strength: Ionic strenght of the host organism (Default: 0.25M)
    :param pMg: The pMg of the host organism (Default: 3.0)
    :param temp_k: The temperature of the host organism in Kelvin (Default: 298.15)
    :param stdev_factor: The standard deviation factor to calculate MDF (Default: 1.96)

    :type pathway: Dict
    :type pathway_id: str
    :type ph: float
    :type ionic_strength: float
    :type pMg: float
    :type temp_k: float
    :type logger: Logger

    :rtype: Dict
    :return: Pathway updated with thermodynalics values
    """

    print_title(
        txt='Pathway Reactions',
        logger=logger,
        waiting=False
    )
    for rxn in pathway.get_list_of_reactions():
        print_reaction(
            rxn=rxn,
            logger=logger
        )

    ## INTERMEDIATE COMPOUNDS
    # Optimise the production of target
    # and remove (if possible) intermediate compounds
    reactions = remove_compounds(
        compounds=pathway.get_intermediate_species(),
        reactions=pathway.get_list_of_reactions(),
        rxn_target_id=pathway.get_target_rxn_id(),
        logger=logger
    )

    ## eQuilibrator
    if cc is None:
        cc = initThermo(
            ph,
            ionic_strength,
            pMg,
            logger
        )

    # Search for the key ID known by eQuilibrator
    cc_species = {}
    substituted_species = {}
    sep = '__64__'
    if compound_substitutes is None:
        compound_substitutes = read_compound_substitutes(
            os_path.join(
                os_path.dirname(os_path.realpath(__file__)),
                'data',
                'compound_substitutes.csv'
            )
        )
    for spe in pathway.get_species():
        spe_split = spe.get_id().split(sep)
        if len(spe_split) > 1:
            _compound_substitutes = {k+sep+spe_split[1]: v for k, v in compound_substitutes.items()}
        else:
            _compound_substitutes = deepcopy(compound_substitutes)
        # If the specie is listed in substitutes file, then take search values from it
        # Check if starts with in case of compound names are like CMPD_NAME__64__COMPID
        if spe.get_id() in _compound_substitutes:
            cc_species[spe.get_id()] = search_equilibrator_compound(
                cc=cc,
                id=_compound_substitutes[spe.get_id()]['id'],
                inchikey=_compound_substitutes[spe.get_id()]['inchikey'],
                inchi=_compound_substitutes[spe.get_id()]['inchi'],
                logger=logger
            )
        # Else, take search values from rpCompound
        else:
            cc_species[spe.get_id()] = search_equilibrator_compound(
                cc=cc,
                id=spe.get_id(),
                inchikey=spe.get_inchikey(),
                inchi=spe.get_inchi(),
                smiles=spe.get_smiles(),
                logger=logger
            )
        if cc_species[spe.get_id()] != {}:
            if spe.get_id() != cc_species[spe.get_id()]['id']:
                substituted_species[spe.get_id()] = cc_species[spe.get_id()][cc_species[spe.get_id()]['cc_key']]
        else:
            logger.warning(f'Compound {spe.get_id()} has not been found within eQuilibrator cache')

    # Store thermo values for the net reactions
    # and for each of the reactions within the pathway
    results = {
        'net_reaction': {},
        'optimized_net_reaction': Reaction.sum_stoichio(reactions),
        'reactions': {},
        'optimized_reactions': {
            rxn.get_id(): rxn
            for rxn in reactions
        },
        'species': {},
        'substituted_species': substituted_species
    }

    # Get the formation energy for each compound
    for spe_id, cc_spe in cc_species.items():
        try:
            value = cc.standard_dg_formation(
                cc.get_compound(
                    cc_spe[cc_spe['cc_key']]
                )
            )[0]  # get .mu
        except Exception as e:
            value = None
            logger.debug(e)
        if value is None:
            value = 'NaN'
        results['species'][spe_id] = {
            'standard_dg_formation': {
                'value': value,
                'units': 'kilojoule / mole'
            }
        }

    # Build the list of IDs known by eQuilibrator
    species_cc_ids = {}
    for spe_id, cc_spe in cc_species.items():
        if cc_spe == {}:
            species_cc_ids[spe_id] = spe_id
        else:
            species_cc_ids[spe_id] = cc_spe[cc_spe['cc_key']]

    ## REACTIONS
    # Compute thermo for each reaction
    for rxn in pathway.get_list_of_reactions():
        results['reactions'][rxn.get_id()] = eQuilibrator(
            species_stoichio=rxn.get_species(),
            species_ids=species_cc_ids,
            cc=cc,
            logger=logger
        )

    ## THERMO
    print_title(
        txt='Computing thermodynamics (eQuilibrator)...',
        logger=logger,
        waiting=True
    )

    results['net_reaction'] = eQuilibrator(
        species_stoichio=Reaction.sum_stoichio(reactions),
        species_ids=species_cc_ids,
        cc=cc,
        logger=logger
    )

    print_OK(logger)

    # Write results into the pathway
    write_results_to_pathway(pathway, results, logger)

    return results