def test_existing_metabolite(iJO1366): iJO1366, biomass_reaction, is_ec_model = iJO1366 metabolite_chebi = find_metabolite(iJO1366, "CHEBI:17790", "chebi", "e") metabolite_bigg = find_metabolite(iJO1366, "meoh", "bigg.metabolite", "e") assert metabolite_chebi == metabolite_bigg assert find_metabolite(iJO1366, "succ", "bigg.metabolite", "e").formula == "C4H4O4" with pytest.raises(MetaboliteNotFound): find_metabolite(iJO1366, "wrong_id", "wrong_namespace", "e")
def apply_medium(model, is_ec_model, medium): """ Apply a medium to a metabolic model. The medium is applied by adding salt mappings, resolving the corresponding exchange reaction for the resulting medium compounds, setting their uptake rate, and then applying that to the model. Parameters ---------- model: cobra.Model is_ec_model: bool A boolean indicating if the model is enzyme-constrained. medium: list(dict) The medium definition, a list of dicts matching the `MediumCompound` schema. Returns ------- tuple (operations, warnings, errors) Operations is a list of model operations necessary to apply the medium to the model. See also the `Operations` schema. Warnings is a list of human-readable strings of potential issues. If errors is not an empty list, it was not possible to apply the medium. Errors then contains a list of string messages describing the problem(s). """ operations = [] warnings = [] errors = [] # Convert the list of dicts to a set of namedtuples to avoid duplicates, as # looking up metabolites in the model is a somewhat expensive operation. Compound = namedtuple("Compound", ["id", "namespace"]) medium = set( Compound(id=c["identifier"], namespace=c["namespace"]) for c in medium) # Detect salt compounds and split them into their ions and metals for compound in medium.copy( ): # Make a copy to be able to mutate the original list if compound.id in SALTS: salt = SALTS[compound.id] logger.info( f"Replacing {compound.id} with ions: {salt['ions']} and metals: " f"{salt['metals']}") medium.remove(compound) medium.update( [Compound(id=ion, namespace="chebi") for ion in salt["ions"]]) medium.update([ Compound(id=metal, namespace="chebi") for metal in salt["metals"] ]) if salt["ions_missing_smiles"]: warning = ( f"Unable to add ions, smiles id could not be mapped: " f"{salt['ions_missing_smiles']}") warnings.append(warning) logger.warning(warning) if salt["metals_missing_inchi"]: warning = ( f"Unable to add metals; inchi string could not be mapped: " f"{salt['metals_missing_inchi']}") warnings.append(warning) logger.warning(warning) # Add trace metals medium.update([ Compound(id="CHEBI:25517", namespace="chebi"), Compound(id="CHEBI:25368", namespace="chebi"), ]) try: extracellular = find_external_compartment(model) except RuntimeError as error: # cobrapy throws RuntimeError if it for any reason is unable to find an # external compartment. See: # https://github.com/opencobra/cobrapy/blob/95d920d135fa824e6087f1fcbc88d50882da4dab/cobra/medium/boundary_types.py#L26 message = ( f"Cannot find an external compartment in model {model.id}: {str(error)}" ) errors.append(message) logger.error(message) # Cannot continue without knowing the external compartment, so # immediately return the error. return operations, warnings, errors # Create a map of exchange reactions and corresponding fluxes to apply to # the medium. medium_mapping = {} for compound in medium: try: extracellular_metabolite = find_metabolite(model, compound.id, compound.namespace, extracellular) except MetaboliteNotFound: warning = ( f"Cannot add medium compound '{compound.id}' - metabolite not found in " f"extracellular compartment '{extracellular}'") warnings.append(warning) logger.warning(warning) else: exchange_reaction = get_exchange_reaction(extracellular_metabolite, is_ec_model, consumption=True) # If someone already figured out the uptake rate for the compound, it's # likely more accurate than our assumptions, so keep it if exchange_reaction.id in model.medium: medium_mapping[exchange_reaction.id] = model.medium[ exchange_reaction.id] continue if not extracellular_metabolite.formula: warning = ( f"No formula for metabolite '{extracellular_metabolite.id}', cannot" f" check if it is a carbon source") warnings.append(warning) logger.warning(warning) # If we don't know, it's most likely that the metabolite does not have a # higher uptake rate than a carbon source, so set the bound still to 10 medium_mapping[exchange_reaction.id] = 10 elif "C" in extracellular_metabolite.elements: # Limit the uptake rate for carbon sources to 10 medium_mapping[exchange_reaction.id] = 10 else: medium_mapping[exchange_reaction.id] = 1000 # Apply the medium to the model, letting cobrapy deal with figuring out the correct # bounds to change model.medium = medium_mapping # Add all exchange reactions to operations, to make sure any changed bounds is # properly updated for reaction in model.exchanges: operations.append({ "operation": "modify", "type": "reaction", "id": reaction.id, "data": reaction_to_dict(reaction), }) return operations, warnings, errors
def apply_measurements( model, biomass_reaction, fluxomics, metabolomics, uptake_secretion_rates, molar_yields, growth_rate, ): """ Apply omics measurements to a metabolic model. For each measured flux (production-rate / uptake-rate), constrain the model by forcing their upper and lower bounds to the measured values. Parameters ---------- model: cobra.Model biomass_reaction: str The id of the biomass reaction in the given model. fluxomics: list(dict) List of measurements matching the `Fluxomics` schema. metabolomics: list(dict) List of measurements matching the `Metabolomics` schema. uptake_secretion_rates: list(dict) List of measurements matching the `UptakeSecretionRates` schema. molar_yields: list(dict) List of measurements matching the `MolarYields` schema. growth_rate: dict Growth rate, matching the `GrowthRate` schema. Returns ------- tuple (operations, warnings, errors) Operations is a list of model operations necessary to apply the measurements to the model. See also the `Operations` schema. Warnings is a list of human-readable strings of potential issues. If errors is not an empty list, it was not possible to apply the measurements. Errors then contains a list of string messages describing the problem(s). """ operations = [] warnings = [] errors = [] def bounds(measurement, uncertainty): """Return resolved bounds based on measurement and uncertainty""" if uncertainty: return (measurement - uncertainty, measurement + uncertainty) else: return (measurement, measurement) # First, improve the fluxomics dataset by minimizing the distance to a feasible # problem. If there is no objective constraint, skip minimization as it can yield # unreliable results. if growth_rate: growth_rate, fluxomics = minimize_distance(model, biomass_reaction, growth_rate, fluxomics) # Constrain the model with the observed growth rate if growth_rate: reaction = model.reactions.get_by_id(biomass_reaction) reaction.bounds = bounds(growth_rate["measurement"], growth_rate["uncertainty"]) operations.append({ "operation": "modify", "type": "reaction", "id": reaction.id, "data": reaction_to_dict(reaction), }) for measure in fluxomics: try: reaction = model.reactions.get_by_id(measure["identifier"]) except KeyError: errors.append( f"Cannot find reaction '{measure['identifier']}' in the model") else: reaction.bounds = bounds(measure["measurement"], measure["uncertainty"]) operations.append({ "operation": "modify", "type": "reaction", "id": reaction.id, "data": reaction_to_dict(reaction), }) for metabolite in metabolomics: warning = ( f"Cannot apply metabolomics measure for '{metabolite['identifier']}'; " f"feature has not yet been implemented") warnings.append(warning) logger.warning(warning) for uptake_rate in uptake_secretion_rates: try: metabolite = find_metabolite(model, uptake_rate["identifier"], uptake_rate["namespace"], "e") except MetaboliteNotFound as error: errors.append(str(error)) else: exchange_reactions = metabolite.reactions.intersection( model.exchanges) if len(exchange_reactions) != 1: errors.append( f"Measured metabolite '{metabolite['identifier']}' has " f"{len(exchange_reactions)} exchange reactions in the model; " f"expected 1") continue exchange_reaction = next(iter(exchange_reactions)) lower_bound, upper_bound = bounds(uptake_rate["measurement"], uptake_rate["uncertainty"]) # data is adjusted assuming a forward exchange reaction, x <-- # (sign = -1), so if we instead actually have <-- x, then multiply with # -1 direction = exchange_reaction.metabolites[metabolite] if direction > 0: lower_bound, upper_bound = -1 * lower_bound, -1 * upper_bound exchange_reaction.bounds = lower_bound, upper_bound operations.append({ "operation": "modify", "type": "reaction", "id": exchange_reaction.id, "data": reaction_to_dict(exchange_reaction), }) for molar_yield in molar_yields: warning = ( f"Cannot apply molar yield measurement for '" f"{molar_yield['product_identifier']}/{molar_yield['substrate_identifier']}" f"'; feature has not yet been implemented") warnings.append(warning) logger.warning(warning) return operations, warnings, errors
def apply_measurements( model, biomass_reaction, is_ec_model, fluxomics, metabolomics, proteomics, uptake_secretion_rates, molar_yields, growth_rate, ): """ Apply omics measurements to a metabolic model. For each measured flux (production-rate / uptake-rate), constrain the model by forcing their upper and lower bounds to the measured values. Parameters ---------- model: cobra.Model biomass_reaction: str The id of the biomass reaction in the given model. is_ec_model: bool A boolean indicating if the model is enzyme-constrained. fluxomics: list(dict) List of measurements matching the `Fluxomics` schema. metabolomics: list(dict) List of measurements matching the `Metabolomics` schema. proteomics: list(dict) List of measurements matching the `Proteomics` schema. uptake_secretion_rates: list(dict) List of measurements matching the `UptakeSecretionRates` schema. molar_yields: list(dict) List of measurements matching the `MolarYields` schema. growth_rate: dict Growth rate, matching the `GrowthRate` schema. Returns ------- tuple (operations, warnings, errors) Operations is a list of model operations necessary to apply the measurements to the model. See also the `Operations` schema. Warnings is a list of human-readable strings of potential issues. If errors is not an empty list, it was not possible to apply the measurements. Errors then contains a list of string messages describing the problem(s). """ operations = [] warnings = [] errors = [] def bounds(measurement, uncertainty): """Return resolved bounds based on measurement and uncertainty""" if uncertainty: return (measurement - uncertainty, measurement + uncertainty) else: return (measurement, measurement) # First, improve the fluxomics dataset by minimizing the distance to a feasible # problem. If there is no objective constraint, skip minimization as it can yield # unreliable results. if growth_rate: growth_rate, fluxomics = minimize_distance(model, biomass_reaction, growth_rate, fluxomics) # If an enzyme constrained model with proteomics was supplied, flexibilize the # proteomics data and redefine the growth rate based on simulations. if growth_rate and proteomics and is_ec_model: growth_rate, proteomics, prot_warnings = flexibilize_proteomics( model, biomass_reaction, growth_rate, proteomics, uptake_secretion_rates) for warning in prot_warnings: warnings.append(warning) logger.warning(warning) # Constrain the model with the observed growth rate if growth_rate: reaction = model.reactions.get_by_id(biomass_reaction) reaction.bounds = bounds(growth_rate["measurement"], growth_rate["uncertainty"]) operations.append({ "operation": "modify", "type": "reaction", "id": reaction.id, "data": reaction_to_dict(reaction), }) for measure in fluxomics: try: reaction = model.reactions.get_by_id(measure["identifier"]) except KeyError: errors.append( f"Cannot find reaction '{measure['identifier']}' in the model") else: reaction.bounds = bounds(measure["measurement"], measure["uncertainty"]) operations.append({ "operation": "modify", "type": "reaction", "id": reaction.id, "data": reaction_to_dict(reaction), }) for metabolite in metabolomics: warning = ( f"Cannot apply metabolomics measure for '{metabolite['identifier']}'; " f"feature has not yet been implemented") warnings.append(warning) logger.warning(warning) for measure in proteomics: if is_ec_model: try: reaction = model.reactions.get_by_id( f"prot_{measure['identifier']}_exchange") except KeyError: warning = f"Cannot find protein '{measure['identifier']}' in the model" warnings.append(warning) logger.warning(warning) else: # measurement only modifies the upper bound (enzymes can be unsaturated) lb, ub = bounds(measure["measurement"], measure["uncertainty"]) reaction.bounds = 0, ub operations.append({ "operation": "modify", "type": "reaction", "id": reaction.id, "data": reaction_to_dict(reaction), }) else: warning = (f"Cannot apply proteomics measurements for " f"non enzyme-constrained model {model.id}") warnings.append(warning) logger.warning(warning) break for rate in uptake_secretion_rates: try: metabolite = find_metabolite(model, rate["identifier"], rate["namespace"], "e") except MetaboliteNotFound as error: errors.append(str(error)) else: exchange_reaction = get_exchange_reaction( metabolite, is_ec_model, consumption=rate["measurement"] < 0) lower_bound, upper_bound = bounds(rate["measurement"], rate["uncertainty"]) # data is adjusted assuming a forward exchange reaction, i.e. x --> # (sign = -1), so if we instead actually have --> x, then multiply with -1 # and flip lower bound and upper bound, to properly adjust for uncertainty, # e.g. if measurement = 3 and uncertainty = 0.3, then: # lb, ub = -1*(3 + 0.3), -1*(3 - 0.3) = -3.3, -2.7 direction = exchange_reaction.metabolites[metabolite] if direction > 0: lower_bound, upper_bound = -1 * upper_bound, -1 * lower_bound exchange_reaction.bounds = lower_bound, upper_bound operations.append({ "operation": "modify", "type": "reaction", "id": exchange_reaction.id, "data": reaction_to_dict(exchange_reaction), }) for molar_yield in molar_yields: warning = ( f"Cannot apply molar yield measurement for '" f"{molar_yield['product_identifier']}/{molar_yield['substrate_identifier']}" f"'; feature has not yet been implemented") warnings.append(warning) logger.warning(warning) return operations, warnings, errors
def flexibilize_proteomics(model, biomass_reaction, growth_rate, proteomics, uptake_secretion_rates): """ Replace proteomics measurements with a set that enables the model to grow. Proteins are removed from the set iteratively based on sensitivity analysis (shadow prices). Parameters ---------- model: cobra.Model The enzyme-constrained model. biomass_reaction: str The id of the biomass reaction in the given model. growth_rate: dict Growth rate, matching the `GrowthRate` schema. proteomics: list(dict) List of measurements matching the `Proteomics` schema. uptake_secretion_rates: list(dict) List of measurements matching the `UptakeSecretionRates` schema. Returns ------- growth_rate: dict New growth rate (will change if the model couldn't grow at the inputted value). proteomics: list(dict) Filtered list of proteomics. warnings: list(str) List of warnings with all flexibilized proteins. """ warnings = [] for rate in uptake_secretion_rates: try: metabolite = find_metabolite(model, rate["identifier"], rate["namespace"], "e") except MetaboliteNotFound: # This simulation will not be completed as the adapter will return an error, # so the flexibilization can be interrupted: return growth_rate, proteomics, warnings else: exchange_reaction = get_exchange_reaction( metabolite, True, consumption=rate["measurement"] < 0) # All exchange reactions in an ec_model have only positive fluxes, so we can # simply assign the absolute value of the measurement: exchange_reaction.bounds = bounds(abs(rate["measurement"]), rate["uncertainty"]) # reset growth rate in model: model.reactions.get_by_id(biomass_reaction).bounds = (0, 1000) # build a table with protein ids, met ids in model and values to constrain with: prot_df = pd.DataFrame() for protein in proteomics: protein_id = protein["identifier"] lb, ub = bounds(protein["measurement"], protein["uncertainty"]) for met in model.metabolites.query(lambda m: protein_id in m.id): new_row = pd.DataFrame(data={ "met_id": met.id, "value": ub }, index=[protein_id]) prot_df = prot_df.append(new_row) # constrain the model with all proteins and optimize: limit_proteins(model, prot_df["value"]) solution = model.optimize() new_growth_rate = solution.objective_value # define the minimal growth required by the flexibilization based on the lower bound # of the growth rate, plus an extra 5% to ensure feasible simulations later on: minimal_growth, ub = bounds(growth_rate["measurement"], growth_rate["uncertainty"]) minimal_growth *= 1.05 # while the model cannot grow to the desired level, remove the protein with # the highest shadow price: prots_to_remove = [] while new_growth_rate < minimal_growth and not prot_df.empty: # get most influential protein in model: top_protein = top_shadow_prices(solution, list(prot_df["met_id"])) top_protein = top_protein.index[0] top_protein = prot_df.index[prot_df["met_id"] == top_protein][0] # update data: append protein to list, remove from current dataframe and # increase the corresponding upper bound to +1000: prots_to_remove.append(top_protein) prot_df = prot_df.drop(labels=top_protein) limit_proteins(model, pd.Series(data=[1000], index=[top_protein])) warning = ( f"Removed protein '{top_protein}' from the proteomics data for feasible " f"simulations") warnings.append(warning) # re-compute solution: solution = model.optimize() if solution.objective_value == new_growth_rate: # the algorithm is stuck break new_growth_rate = solution.objective_value # update growth rate if optimization was not successful: if new_growth_rate < minimal_growth: if growth_rate["uncertainty"]: growth_rate[ "measurement"] = new_growth_rate + growth_rate["uncertainty"] else: growth_rate["measurement"] = new_growth_rate # update proteomics by removing flexibilized proteins: for protein in prots_to_remove: index = next( (index for (index, dic) in enumerate(proteomics) if dic["identifier"] == protein), None, ) del proteomics[index] return growth_rate, proteomics, warnings