def _try_complete(args): """Try to complete the medium for a model.""" file, med, growth, max_import, mip, w = args mod = load_model(file) exc = find_external_compartment(mod) try: fixed = mm.complete_medium(mod, med, growth, max_import=max_import, minimize_components=mip, weights=w) added = sum(i not in med.index for i in fixed.index) can_grow = True logger.info("Could grow `%s` by adding %d import." % (file, added)) except OptimizationError: fixed = pd.Series(float("nan"), index=med.index) added = float("nan") can_grow = False logger.info("Could not grow `%s`." % file) fixed.index = [ re.sub( "(_{}$)|([^a-zA-Z0-9 :]{}[^a-zA-Z0-9 :]$)".format(exc, exc), "_m", rid, ) for rid in fixed.index ] return (can_grow, added, fixed)
def test_download(tmpdir): print(tmpdir.dirpath()) util.download_model(URL, str(tmpdir)) assert tmpdir.join("e_coli_core.xml.gz").check() model = util.load_model(URL) assert len(model.reactions) == 95 assert len(model.metabolites) == 72
def reaction_matrix(files): """Create a matrix of reactions x models.""" ids = [] for f in files: model = load_model(f) ids.extend([(r.id, model.name) for r in model.reactions]) rlist = pd.DataFrame(ids, columns=["reaction", "id"]) rlist["value"] = 1 rlist = rlist.pivot_table(values="value", index="id", columns="reaction") return rlist.fillna(0).astype(int)
def _grow(args): """Get the maximum growth rate under a given medium.""" file, med = args mod = load_model(file) good = med[med.index.isin([r.id for r in mod.exchanges])] if len(good) == 0: logger.warning("Could not find any reactions from the medium in `%s`. " "Maybe a mismatch in IDs?") mod.medium = med[med.index.isin([r.id for r in mod.exchanges])] rate = mod.slim_optimize() return rate
def test_join_models(): single = util.load_model(tax.file[0]) single_coefs = { v.name: coef for v, coef in single.objective.get_linear_coefficients( single.variables).items() } mod = util.join_models(tax.file, id="test_model") coefs = { v.name: coef for v, coef in mod.objective.get_linear_coefficients( mod.variables).items() } assert len(mod.reactions) == len(single.reactions) + 1 # added biomass assert len(mod.metabolites) == len(single.metabolites) assert np.allclose(single.slim_optimize(), mod.slim_optimize())
def _fix_medium(args): """Get the fixed medium for a model.""" mid, file, medium, min_growth, max_import, min_c = args model = load_model(file) for r in model.reactions: r.id = clean_ids(r.id) try: fixed = complete_medium( model, medium, min_growth=min_growth, max_import=max_import, minimize_components=min_c, ) except Exception: fixed = medium.copy() if model.solver.status != OPTIMAL: logger.warning("Can't reach the specified growth rate for model %s." % mid) fixed = medium.copy() fixed.name = mid return fixed
def test_load_model(): row = tax.loc[0] model = util.load_model(row.file) assert len(model.reactions) == 95 assert len(model.metabolites) == 72
def __init__(self, taxonomy, id=None, name=None, rel_threshold=1e-6, solver=None, progress=True, max_exchange=100, mass=1): """Create a new community object. `micom` builds a community from a taxonomy which may simply be a list of model files in its simplest form. Usually, the taxonomy will contain additional information such as annotations for the individuals (for instance phylum, organims or species) and abundances. Notes ----- `micom` will automatically add exchange fluxes and and a community objective maximizing the overall growth rate of the community. Parameters ---------- taxonomy : pandas.DataFrame The taxonomy used for building the model. Must have at least the two columns "id" and "file" which specify an ID and the filepath for each model. Valid file extensions are ".pickle", ".xml", ".xml.gz" and ".json". If the taxonomy includes a column named "abundance" it will be used to quantify each individual in the community. If absent `micom` will assume all individuals are present in the same amount. id : str, optional The ID for the community. Should only contain letters and numbers, otherwise it will be formatted as such. name : str, optional The name for the community. rel_threshold : float < 1, optional The relative abundance threshold that will be used. Describes the smallest relative amount of an individual that will be considered non-zero. All individuals with a smaller relative amount will be omitted. solver : str, optional Which solver to use. Will default to cplex if available which is better suited for large problems. progress : bool, optional Show a progress bar. max_exchange : positive float, optional During model constructions exchange reactions are duplicated into internal and external exchange reactions. This specifies the new import flux bound for the *internal* exchange reaction. Import rates for the exchanges between the medium and outside are still mantained. mass : positive float, optional The total mass of the community in gDW. Used to adjust import fluxes which are assumed to be given as mmol/gDW*h for the entire community. As a consequence all import fluxes will be divided by that number. Attributes ---------- species : list A list of species IDs in the community. """ super(Community, self).__init__(id, name) logger.info("building new micom model {}.".format(id)) if not solver: self.solver = ("cplex" if "cplex" in cobra.util.solver.solvers else "glpk") else: self.solver = solver adjust_solver_config(self.solver) if not (isinstance(taxonomy, pd.DataFrame) and all(col in taxonomy.columns for col in _taxonomy_cols)): raise ValueError("`taxonomy` must be a pandas DataFrame with at" "least columns id and file :(") self._rtol = rel_threshold self._modification = None self.mass = mass taxonomy = taxonomy.copy() if "abundance" not in taxonomy.columns: taxonomy["abundance"] = 1 taxonomy.abundance /= taxonomy.abundance.sum() logger.info("{} individuals with abundances below threshold".format( (taxonomy.abundance <= self._rtol).sum())) taxonomy = taxonomy[taxonomy.abundance > self._rtol] if taxonomy.id.str.contains(r"[^A-Za-z0-9_]", regex=True).any(): logger.warning("taxonomy IDs contain prohibited characters and" " will be reformatted") taxonomy.id = taxonomy.id.replace([r"[^A-Za-z0-9_\s]", r"\s+"], ["", "_"], regex=True) self.__taxonomy = taxonomy self.__taxonomy.index = self.__taxonomy.id obj = Zero self.species = [] index = self.__taxonomy.index index = tqdm(index, unit="models") if progress else index for idx in index: row = self.__taxonomy.loc[idx] if isinstance(row.file, list): if len(row.file) > 1: model = join_models(row.file) logger.info("joined {} models".format(len(row.file))) else: model = load_model(row.file[0]) else: model = load_model(row.file) suffix = "__" + idx.replace(" ", "_").strip() logger.info("converting IDs for {}".format(idx)) for r in model.reactions: r.global_id = re.sub("__\\d__", "_", r.id).strip(" _-") r.id = r.global_id + suffix r.community_id = idx for m in model.metabolites: m.global_id = re.sub("__\\d+__", "_", m.id).strip(" _-") m.id = m.global_id + suffix m.compartment += suffix m.community_id = idx logger.info("adding reactions for {} to community".format(idx)) self.add_reactions(model.reactions) o = self.solver.interface.Objective.clone(model.objective, model=self.solver) obj += o.expression * row.abundance self.species.append(idx) species_obj = self.problem.Constraint(o.expression, name="objective_" + idx, lb=0.0) self.add_cons_vars([species_obj]) self.__add_exchanges(model.reactions, row, internal_exchange=max_exchange) self.solver.update() # to avoid dangling refs due to lazy add com_obj = add_var_from_expression(self, "community_objective", obj, lb=0) self.objective = self.problem.Objective(com_obj, direction="max")
def __init__( self, taxonomy, model_db=None, id=None, name=None, rel_threshold=1e-6, solver=None, progress=True, max_exchange=100, mass=1, ): """Create a new community object. `micom` builds a community from a taxonomy which may simply be a list of model files in its simplest form. Usually, the taxonomy will contain additional information such as annotations for the individuals (for instance phylum, organims or species) and abundances. The recommended way to build a micom model is to supply a quantification of taxa (called "taxonomy" here) which specifies the taxonomic ranks for a taxon and its abundance, and a model database for a specific rank (for instance "genus"). MICOM will match the ranks from your taxonomy to the model database and assemble the community models from that. You will also get information about the construction process by calling `Community.build_metrics`. The most customizable way only takes a single table where summarization and matching to the reference database has already occured. In this case you will also provide paths to model files for each taxon. This is the "old" way but may still be applicable if you want to use a custom database or want full control of matching your data to reference models. Notes ----- `micom` will automatically add exchange fluxes and and a community objective maximizing the overall growth rate of the community. Parameters ---------- taxonomy : pandas.DataFrame The taxonomy used for building the model. Must have at least the column "id". If no model database is specified in the next argument it furthermore requires a column "file" which specifies a filepath for each model. Valid file extensions are ".pickle", ".xml", ".xml.gz" and ".json". If a model database is specified this must contain at least a column with the same name as the rank used in the model database. Thus, for a genus-level database you will need a column `genus`. Additional taxa ranks can also be specified and will be used to be more stringent in taxa matching. Finally, the taxonomy should contain a column `abundance`. It will be used to quantify each individual in the community. If absent, MICOM will assume all individuals are present in the same amount. model_db : str A pre-built model database. If ending in `.qza` must be a Qiime 2 artifact of type `MetabolicModels[JSON]`. Can also be a folder, zip (must end in `.zip`) file or None if the taxonomy contains a column `file`. id : str, optional The ID for the community. Should only contain letters and numbers, otherwise it will be formatted as such. name : str, optional The name for the community. rel_threshold : float < 1, optional The relative abundance threshold that will be used. Describes the smallest relative amount of an individual that will be considered non-zero. All individuals with a smaller relative amount will be omitted. solver : str, optional Which solver to use. Will default to cplex if available which is better suited for large problems. progress : bool, optional Show a progress bar. max_exchange : positive float, optional During model constructions exchange reactions are duplicated into internal and external exchange reactions. This specifies the new import flux bound for the *internal* exchange reaction. Import rates for the exchanges between the medium and outside are still mantained. mass : positive float, optional The total mass of the community in gDW. Used to adjust import fluxes which are assumed to be given as mmol/gDW*h for the entire community. As a consequence all import fluxes will be divided by that number. Attributes ---------- taxa : list A list of taxa IDs in the community. """ super(Community, self).__init__(id, name) logger.info("building new micom model {}.".format(id)) if not solver: solver = [ s for s in ["cplex", "osqp", "gurobi", "glpk"] if s in cobra.util.solver.solvers ][0] logger.info("using the %s solver." % solver) if solver == "glpk": logger.warning( "No QP solver found, will use GLPK. A lot of functionality " "in MICOM will require a QP solver :/") self.solver.configuration.lp_method = "auto" self.solver.configuration.qp_method = "auto" self.solver.configuration.presolve = False self.solver = solver self._rtol = rel_threshold self._modification = None self.mass = mass self.__db_metrics = None adjust_solver_config(self.solver) taxonomy = taxonomy.copy() if "abundance" not in taxonomy.columns: taxonomy["abundance"] = 1 taxonomy.abundance /= taxonomy.abundance.sum() logger.info("{} individuals with abundances below threshold".format( (taxonomy.abundance <= self._rtol).sum())) taxonomy = taxonomy[taxonomy.abundance > self._rtol] if not (isinstance(taxonomy, pd.DataFrame) and "id" in taxonomy.columns): raise ValueError("`taxonomy` must be a pandas DataFrame with at" "least a column `id` :(") if model_db is None and "file" not in taxonomy.columns: raise ValueError( "If no model database is specified you need to pass " "file names for models in a `file` column as well.") compressed = False if model_db is not None: compressed = model_db.endswith(".qza") or model_db.endswith(".zip") if compressed: tdir = TemporaryDirectory(prefix="micom_") if "file" in taxonomy.columns: del taxonomy["file"] if model_db.endswith(".qza"): manifest = load_qiime_model_db(model_db, tdir.name) elif model_db.endswith(".zip"): manifest = load_zip_model_db(model_db, tdir.name) else: manifest = load_manifest(model_db) rank = manifest["summary_rank"][0] if rank not in taxonomy.columns: raise ValueError("Missing the column `%s` from the taxonomy." % rank) keep_cols = [ r for r in _ranks[0:(_ranks.index(rank) + 1)] if r in taxonomy.columns and r in manifest.columns ] manifest = manifest[keep_cols + ["file"]] merged = pd.merge(taxonomy, manifest, on=keep_cols) self.__db_metrics = pd.Series({ "found_taxa": merged.shape[0], "total_taxa": taxonomy.shape[0], "found_fraction": merged.shape[0] / taxonomy.shape[0], "found_abundance_fraction": merged.abundance.sum(), }) logger.info("Matched %g%% of total abundance in model DB." % (100.0 * self.__db_metrics[3])) if self.__db_metrics["found_abundance_fraction"] < 0.5: logger.warning( "Less than 50%% of the abundance could be matched to the " "model database. Model `%s` may not be representative " "of the sample" % self.id) taxonomy = merged taxonomy["abundance"] /= taxonomy["abundance"].sum() if taxonomy.id.str.contains(r"[^A-Za-z0-9_]", regex=True).any(): logger.warning("Taxa IDs contain prohibited characters and" " will be reformatted.") taxonomy.id = taxonomy.id.replace(r"[^A-Za-z0-9_\s]+", "_", regex=True) self.__taxonomy = taxonomy self.__taxonomy.index = self.__taxonomy.id obj = Zero self.taxa = [] index = self.__taxonomy.index index = tqdm(index, unit="models") if progress else index for idx in index: row = self.__taxonomy.loc[idx] if isinstance(row.file, list): if len(row.file) > 1: model = join_models(row.file) logger.info("joined {} models".format(len(row.file))) else: model = load_model(row.file[0]) else: model = load_model(row.file) suffix = "__" + idx.replace(" ", "_").strip() logger.info("converting IDs for {}".format(idx)) external = cobra.medium.find_external_compartment(model) logger.info("Identified %s as the external compartment for %s. " "If that is wrong you may be in trouble..." % (external, idx)) for r in model.reactions: r.global_id = clean_ids(r.id) r.id = r.global_id + suffix r.community_id = idx # avoids https://github.com/opencobra/cobrapy/issues/926 r._compartments = None # SBO terms may not be maintained if "sbo" in r.annotation: del r.annotation["sbo"] for m in model.metabolites: m.global_id = clean_ids(m.id) m.id = m.global_id + suffix m.compartment += suffix m.community_id = idx logger.info("adding reactions for {} to community".format(idx)) self.add_reactions(model.reactions) o = self.solver.interface.Objective.clone(model.objective, model=self.solver) obj += o.expression * row.abundance self.taxa.append(idx) taxa_obj = self.problem.Constraint(o.expression, name="objective_" + idx, lb=0.0) self.add_cons_vars([taxa_obj]) self.__add_exchanges( model.reactions, row, external_compartment=external, internal_exchange=max_exchange, ) self.solver.update() # to avoid dangling refs due to lazy add if compressed: tdir.cleanup() com_obj = add_var_from_expression(self, "community_objective", obj, lb=0) self.objective = self.problem.Objective(com_obj, direction="max")
def _annotate(f): """Get annotation for a model.""" mod = load_model(f) return annotate_metabolites_from_exchanges(mod)