def set_abundance(self, value, normalize=True): """Change abundances for one or more taxa. Parameters ---------- value : array-like object The new abundances. Must contain one value for each taxon. Can be a named object like a pandas Series. normalize : boolean, optional Whether to normalize the abundances to a total of 1.0. Many things in micom asssume that this is always the case. Only change this if you know what you are doing :O """ try: self.__taxonomy.abundance = value except Exception: raise ValueError("value must be an iterable with an entry for " "each species/tissue") logger.info("setting new abundances for %s" % self.id) ab = self.__taxonomy.abundance if normalize: self.__taxonomy.abundance /= ab.sum() small = ab < self._rtol logger.info("adjusting abundances for %s to %g" % (str(self.__taxonomy.index[small]), self._rtol)) self.__taxonomy.loc[small, "abundance"] = self._rtol self.__update_exchanges() self.__update_community_objective()
def _growth(args): p, tradeoff, medium = args com = load_pickle(p) ex_ids = [r.id for r in com.exchanges] logger.info( "%d/%d import reactions found in model.", medium.index.isin(ex_ids).sum(), len(medium), ) com.medium = medium[medium.index.isin(ex_ids)] # Get growth rates try: sol = com.cooperative_tradeoff(fraction=tradeoff) rates = sol.members rates["taxon"] = rates.index rates["tradeoff"] = tradeoff rates["sample_id"] = com.id except Exception: logger.warning("Could not solve cooperative tradeoff for %s." % com.id) return None # Get the minimal medium med = minimal_medium(com, 0.95 * sol.growth_rate) # Apply medium and reoptimize com.medium = med[med > 0] sol = com.cooperative_tradeoff(fraction=tradeoff, fluxes=True, pfba=False) fluxes = sol.fluxes.loc[:, sol.fluxes.columns.str.startswith("EX_")].copy() fluxes["sample_id"] = com.id return {"growth": rates, "exchanges": fluxes}
def _apply_min_growth(community, min_growth, atol=1e-6, rtol=1e-6): """Set minimum growth constraints on a model. Will integrate with the context. """ context = get_context(community) def reset(taxon, lb): logger.info("resetting growth rate constraint for %s" % taxon) community.constraints["objective_" + taxon].ub = None community.constraints["objective_" + taxon].lb = lb for sp in community.taxa: logger.info("setting growth rate constraint for %s" % sp) obj = community.constraints["objective_" + sp] if context: context(partial(reset, sp, obj.lb)) if min_growth[sp] > atol: obj.lb = (1.0 - rtol) * min_growth[sp] - atol else: logger.info( "minimal growth rate smaller than tolerance," " setting to zero." ) obj.lb = 0
def solve(community, fluxes=True, pfba=True, raise_error=False, atol=1e-6, rtol=1e-6): """Get all fluxes stratified by taxa.""" community.solver.optimize() status = community.solver.status if status in good: if status != OPTIMAL: if raise_error: raise OptimizationError("solver returned the status %s." % status) else: logger.info("solver returned the status %s," % status + " returning the solution anyway.") if fluxes and pfba: add_pfba_objective(community, atol, rtol) community.solver.optimize() if fluxes: sol = CommunitySolution(community) else: sol = CommunitySolution(community, slim=True) return sol logger.warning("solver encountered an error %s" % status) return None
def _try_complete(args): """Try to complete the medium for a model.""" file, med, growth, max_import, mip, w = args mod = load_model(file) exc = find_external_compartment(mod) try: fixed = mm.complete_medium(mod, med, growth, max_import=max_import, minimize_components=mip, weights=w) added = sum(i not in med.index for i in fixed.index) can_grow = True logger.info("Could grow `%s` by adding %d import." % (file, added)) except OptimizationError: fixed = pd.Series(float("nan"), index=med.index) added = float("nan") can_grow = False logger.info("Could not grow `%s`." % file) fixed.index = [ re.sub( "(_{}$)|([^a-zA-Z0-9 :]{}[^a-zA-Z0-9 :]$)".format(exc, exc), "_m", rid, ) for rid in fixed.index ] return (can_grow, added, fixed)
def _tradeoff(args): p, tradeoffs, medium = args com = load_pickle(p) ex_ids = [r.id for r in com.exchanges] logger.info( "%d/%d import reactions found in model.", medium.index.isin(ex_ids).sum(), len(medium), ) com.medium = medium[medium.index.isin(ex_ids)] sol = com.optimize() rates = sol.members rates["taxon"] = rates.index rates["tradeoff"] = np.nan rates["sample_id"] = com.id df = [rates] # Get growth rates try: sol = com.cooperative_tradeoff(fraction=tradeoffs) except Exception as e: logger.warning("Sample %s could not be optimized\n %s" % (com.id, str(e))) return None for i, s in enumerate(sol.solution): rates = s.members rates["taxon"] = rates.index rates["tradeoff"] = sol.tradeoff[i] rates["sample_id"] = com.id df.append(rates) df = pd.concat(df) return df[df.taxon != "medium"]
def serialize_models(files, dir="."): """Convert several models to Python pickles.""" for f in files: fname = path.basename(f).split(".")[0] model = load_model(f) logger.info("serializing {}".format(f)) pickle.dump(model, open(path.join(dir, fname + ".pickle"), "wb"), protocol=2) # required for Python 2 compat
def euclidean(inclusion): """Calculate euclidean distances for a community.""" logger.info("calculating euclidean distance for {}x{} input matrix".format( *inclusion.shape)) euclidean = np.apply_along_axis(lambda a: ((a - inclusion)**2).sum(1), 1, inclusion) return np.sqrt(euclidean)
def _growth(args): p, tradeoff, medium, weights, atol, rtol = args com = load_pickle(p) if atol is None: atol = com.solver.configuration.tolerances.optimality if rtol is None: rtol = com.solver.configuration.tolerances.optimality com = load_pickle(p) if "glpk" in interface_to_str(com.solver.interface): logger.error( "Community models were not built with a QP-capable solver. " "This means that you did not install CPLEX or Gurobi. " "If you did install one of the two please file a bug report " "at https://github.com/micom-dev/micom/issues." ) return None ex_ids = [r.id for r in com.exchanges] logger.info( "%d/%d import reactions found in model.", medium.index.isin(ex_ids).sum(), len(medium), ) com.medium = medium[medium.index.isin(ex_ids)] # Get growth rates try: sol = com.cooperative_tradeoff(fraction=tradeoff) rates = sol.members rates["taxon"] = rates.index rates["tradeoff"] = tradeoff rates["sample_id"] = com.id except Exception: logger.warning( "Could not solve cooperative tradeoff for %s. " "This can often be fixed by chosing ore permissive atol and rtol " "arguments." % com.id) return None # Get the minimal medium and the solution at the same time sol = minimal_medium( com, exchanges=None, community_growth=sol.growth_rate, min_growth=rates.growth_rate.drop("medium"), solution=True, weights=weights, atol=atol, rtol=rtol )["solution"] fluxes = sol.fluxes.loc[:, sol.fluxes.columns.str.startswith("EX_")].copy() fluxes["sample_id"] = com.id fluxes["tolerance"] = atol anns = annotate_metabolites_from_exchanges(com) return {"growth": rates, "exchanges": fluxes, "annotations": anns}
def jaccard(inclusion): """Calculate jaccard distances for a community.""" logger.info("calculating jaccard distance for {}x{} input matrix".format( *inclusion.shape)) jaccard = np.apply_along_axis(lambda a: (a & inclusion).sum(1), 1, inclusion) jaccard = jaccard / np.apply_along_axis(lambda a: (a | inclusion).sum(1), 1, inclusion) return 1 - jaccard
def reset_solver(community): """Reset the solver.""" interface = interface_to_str(community.solver.interface) logger.info("resetting solver, hoping for the best.") if interface == "cplex": logger.warning("switching cplex LP algorithm to `network`.") community.solver.configuration.lp_method = "network" elif interface == "gurobi": community.solver.problem.reset() elif interface == "glpk": glp_adv_basis(community.solver.problem, 0)
def __update_exchanges(self): """Update exchanges.""" logger.info("updating exchange reactions for %s" % self.id) for met in self.metabolites.query(lambda x: x.compartment == "m"): for r in met.reactions: if r.boundary: continue coef = self.__taxonomy.loc[r.community_id, "abundance"] if met in r.products: r.add_metabolites({met: coef}, combine=False) else: r.add_metabolites({met: -coef}, combine=False)
def reset_solver(community): """Reset the solver.""" interface = interface_to_str(community.solver.interface) logger.info("resetting solver, hoping for the best.") if interface == "cplex": community.solver.configuration.lp_method = "network" community.solver.configuration.lp_method = "barrier" elif interface == "gurobi": community.solver.problem.reset() elif interface == "glpk": glp_adv_basis(community.solver.problem, 0) elif interface == "osqp": community.solver.problem.reset()
def check_db_medium(model_db, medium, threads=1): """Complete a growth medium for all models in a database. Arguments --------- model_db : str A pre-built model database. If ending in `.qza` must be a Qiime 2 artifact of type `MetabolicModels[JSON]`. Can also be a folder, zip (must end in `.zip`) file or None if the taxonomy contains a column `file`. medium : pd.DataFrame A growth medium. Must have columns "reaction" and "flux" denoting exchange reactions and their respective maximum flux. Can not be sample specific. threads : int >=1 The number of parallel workers to use when building models. As a rule of thumb you will need around 1GB of RAM for each thread. Returns ------- pd.DataFrame Returns an annotated manifest file with a column `can_grow` that tells you whether the model can grow on the (fixed) medium, and a column `growth_rate` that gives the growth rate. """ medium = process_medium(medium, ["dummy"]) medium.index = medium.global_id compressed = model_db.endswith(".qza") or model_db.endswith(".zip") if compressed: tdir = TemporaryDirectory(prefix="micom_") if model_db.endswith(".qza"): manifest = load_qiime_model_db(model_db, tdir.name) elif model_db.endswith(".zip"): manifest = load_zip_model_db(model_db, tdir.name) else: manifest = load_manifest(model_db) rank = manifest["summary_rank"][0] logger.info("Checking %d %s-level models on a medium with %d components." % (manifest.shape[0], rank, len(medium))) args = [(f, medium.flux) for f in manifest.file] results = workflow(_grow, args, threads) manifest["growth_rate"] = results manifest["can_grow"] = manifest.growth_rate.notna() & (manifest.growth_rate > 1e-6) if compressed: tdir.cleanup() return manifest
def __update_community_objective(self): """Update the community objective.""" logger.info("updating the community objective for %s" % self.id) v = self.variables.community_objective const = self.constraints.community_objective_equality self.remove_cons_vars([const]) com_obj = Zero for sp in self.species: ab = self.__taxonomy.loc[sp, "abundance"] species_obj = self.constraints["objective_" + sp] com_obj += ab * species_obj.expression const = self.problem.Constraint((v - com_obj).expand(), lb=0, ub=0, name="community_objective_equality") self.add_cons_vars([const])
def db_annotations( model_db, threads=1, ): """Get metabolite annotations from a model DB. Arguments --------- model_db : str A pre-built model database. If ending in `.qza` must be a Qiime 2 artifact of type `MetabolicModels[JSON]`. Can also be a folder, zip (must end in `.zip`) file or None if the taxonomy contains a column `file`. threads : int >=1 The number of parallel workers to use when building models. As a rule of thumb you will need around 1GB of RAM for each thread. Returns ------- pd.DataFrame Annotations for all exchanged metabolites. """ compressed = model_db.endswith(".qza") or model_db.endswith(".zip") if compressed: tdir = TemporaryDirectory(prefix="micom_") if model_db.endswith(".qza"): manifest = load_qiime_model_db(model_db, tdir.name) elif model_db.endswith(".zip"): manifest = load_zip_model_db(model_db, tdir.name) else: manifest = load_manifest(model_db) rank = manifest["summary_rank"][0] logger.info("Getting annotations from %d %s-level models ." % (manifest.shape[0], rank)) args = manifest.file.tolist() results = workflow(_annotate, args, threads) anns = pd.concat(results).drop_duplicates() if compressed: tdir.cleanup() return anns
def knockout_taxa( community, taxa, fraction, method, progress, diag=True ): """Knockout a taxon from the community.""" with community as com: check_modification(com) min_growth = _format_min_growth(0.0, com.taxa) _apply_min_growth(com, min_growth) com.objective = com.scale * com.variables.community_objective community_min_growth = ( optimize_with_retry(com, "could not get community growth rate.") / com.scale ) regularize_l2_norm(com, fraction * community_min_growth) old = com.optimize().members["growth_rate"] results = [] iter = track(taxa, description="Knockouts") if progress else taxa for sp in iter: with com: logger.info("getting growth rates for " "%s knockout." % sp) [ r.knock_out() for r in com.reactions.query( lambda ri: ri.community_id == sp ) ] sol = optimize_with_fraction(com, fraction) new = sol.members["growth_rate"] if "change" in method: new = new - old if "relative" in method: new /= old results.append(new) ko = pd.DataFrame(results, index=taxa).drop("medium", 1) ko = ko.loc[ko.index.sort_values(), ko.columns.sort_values()] if not diag: np.fill_diagonal(ko.values, np.NaN) return ko
def knockout_species(community, species, fraction, method, progress, diag=True): """Knockout a species from the community.""" with community as com: check_modification(com) min_growth = _format_min_growth(0.0, com.species) _apply_min_growth(com, min_growth) com.objective = 1000.0 * com.variables.community_objective community_min_growth = ( optimize_with_retry(com, "could not get community growth rate.") / 1000.0) regularize_l2_norm(com, fraction * community_min_growth) old = com.optimize().members["growth_rate"] results = [] if progress: species = tqdm(species, unit="knockout(s)") for sp in species: with com: logger.info("getting growth rates for " "%s knockout." % sp) [ r.knock_out() for r in com.reactions.query( lambda ri: ri.community_id == sp) ] sol = optimize_with_fraction(com, fraction) new = sol.members["growth_rate"] if "change" in method: new = new - old if "relative" in method: new /= old results.append(new) ko = pd.DataFrame(results, index=species).drop("medium", 1) if not diag: np.fill_diagonal(ko.values, np.NaN) return ko
def fix_community_medium( tax, medium, min_growth=0.1, max_import=1, minimize_components=True, n_jobs=4, ): """Augment a growth medium so all community members can grow in it. Arguments --------- tax : pandas.Dataframe A taxonomy specification as passed to `micom.Community`. medium : pandas.Series A growth medium with exchange reaction IDs as index and positive import fluxes as values. min_growth : positive float The minimum biomass production required for growth. max_import : positive float The maximum import rate for added imports. minimize_components : boolean Whether to minimize the number of media components rather than the total flux. n_jobs: int The number of processes to use. Returns ------- pandas.Series A new growth medium with the smallest amount of augmentations such that all members of the community can grow in it. """ if medium[medium < 1e-6].any(): medium[medium < 1e-6] = 1e-6 logger.info( "Some import rates were to small and were adjusted to 1e-6.") args = [(row.id, row.file, medium, min_growth, max_import, minimize_components) for _, row in tax.iterrows()] res = workflow(_fix_medium, args, n_jobs=n_jobs, unit="model(s)") return pd.concat(res, axis=1).max(axis=1)
def optimize_single(self, id): """Optimize growth rate for one individual. `optimize_single` will calculate the maximal growth rate for one individual member of the community. Notes ----- This might well mean that growth rates for all other individuals are low since the individual may use up all available resources. Parameters ---------- id : str The ID of the individual to be optimized. fluxes : boolean, optional Whether to return all fluxes. Defaults to just returning the maximal growth rate. Returns ------- float The maximal growth rate for the given species. """ if isinstance(id, six.string_types): if id not in self.__taxonomy.index: raise ValueError(id + " not in taxonomy!") info = self.__taxonomy.loc[id] elif isinstance(id, int) and id >= 0 and id < len(self.__taxonomy): info = self.__taxonomy.iloc[id] else: raise ValueError("`id` must be an id or positive index!") logger.info("optimizing for {}".format(info.name)) obj = self.constraints["objective_" + info.name] with self as m: m.objective = obj.expression m.solver.optimize() return m.objective.value
def crossover(community, sol, fluxes=False, pfba=False): """Get the crossover solution.""" gcs = sol.members.growth_rate.drop("medium") com_growth = sol.growth_rate logger.info("Starting crossover...") with community as com: logger.info("constraining growth rates.") context = get_context(community) if context is not None: context(partial(reset_min_community_growth, com)) reset_min_community_growth(com) com.variables.community_objective.lb = 0.0 com.variables.community_objective.ub = com_growth + 1e-6 com.objective = 1000.0 * com.variables.community_objective for sp in com.species: const = com.constraints["objective_" + sp] const.ub = gcs[sp] logger.info("finding closest feasible solution") s = com.optimize() if s is None: reset_solver(com) s = com.optimize() if s is not None: s = CommunitySolution(com, slim=not fluxes) for sp in com.species: com.constraints["objective_" + sp].ub = None if s is None: raise OptimizationError("crossover could not converge (status = %s)." % community.solver.status) s.objective_value /= 1000.0 return s
def regularize_l2_norm(community, min_growth): """Add an objective to find the most "egoistic" solution. This adds an optimization objective finding a solution that maintains a (sub-)optimal community growth rate but is the closest solution to the community members individual maximal growth rates. So it basically finds the best possible tradeoff between maximizing community growth and individual (egoistic) growth. Here the objective is given as the sum of squared differences between the individuals current and maximal growth rate. In the linear case squares are substituted by absolute values (Manhattan distance). Arguments --------- community : micom.Community The community to modify. min_growth : positive float The minimal community growth rate that has to be mantained. linear : boolean Whether to use a non-linear (sum of squares) or linear version of the cooperativity cost. If set to False requires a QP-capable solver. max_gcs : None or dict The precomputed maximum individual growth rates. """ logger.info("adding L2 norm to %s" % community.id) l2 = Zero community.variables.community_objective.lb = min_growth context = get_context(community) if context is not None: context(partial(reset_min_community_growth, community)) for sp in community.taxa: taxa_obj = community.constraints["objective_" + sp] ex = sum(v for v in taxa_obj.variables if (v.ub - v.lb) > 1e-6) l2 += (community.scale * (ex ** 2)).expand() community.objective = -l2 community.modification = "l2 regularization" logger.info("finished adding tradeoff objective to %s" % community.id)
def _apply_min_growth(community, min_growth): """Set minimum growth constraints on a model. Will integrate with the context. """ context = get_context(community) def reset(species, lb): logger.info("resetting growth rate constraint for %s" % species) community.constraints["objective_" + species].ub = None community.constraints["objective_" + species].lb = lb for sp in community.species: logger.info("setting growth rate constraint for %s" % sp) obj = community.constraints["objective_" + sp] if context: context(partial(reset, sp, obj.lb)) if min_growth[sp] > 1e-6: obj.lb = min_growth[sp] else: logger.info("minimal growth rate smaller than tolerance," " setting to zero.") obj.lb = 0
def load_model(filepath): """Load a cobra model from several file types.""" logger.info("reading model from {}".format(filepath)) parsed = urlparse(filepath) if parsed.scheme and parsed.netloc: tmpdir = tempfile.mkdtemp() logger.info("created temporary directory {}".format(tmpdir)) filepath = download_model(filepath, folder=tmpdir) model = _read_model(filepath) rmtree(tmpdir) logger.info("deleted temporary directory {}".format(tmpdir)) else: model = _read_model(filepath) return model
def reset(taxon, lb): logger.info("resetting growth rate constraint for %s" % taxon) community.constraints["objective_" + taxon].ub = None community.constraints["objective_" + taxon].lb = lb
def minimal_medium( community, community_growth, min_growth=0.0, exports=False, exchanges=None, minimize_components=False, open_exchanges=False, solution=False, weights=None, atol=None, rtol=None, ): """Find the minimal growth medium for the community. Finds the minimal growth medium for the community which allows for community as well as individual growth. Here, a minimal medium can either be the medium requiring the smallest total import flux or the medium requiring the least components (ergo ingredients). Arguments --------- community : micom.Community The community to modify. community_growth : positive float The minimum community-wide growth rate. min_growth : positive float or array-like object. The minimum growth rate for each individual in the community. Either a single value applied to all individuals or one value for each. exports : boolean Whether to include export fluxes in the returned medium. Defaults to False which will only return import fluxes. exchanges : list of cobra.Reactions The list of exchange reactions that are penalized. minimize_components : boolean Whether to minimize the number of components instead of the total import flux. Might be more intuitive if set to True but may also be slow to calculate for large communities. open_exchanges : boolean or number Whether to ignore currently set bounds and make all exchange reactions in the model possible. If set to a number all exchange reactions will be opened with (-number, number) as bounds. solution : boolean Whether to also return the entire solution and all fluxes for the minimal medium. weights : str Will scale the fluxes by a weight factor. Can either be "mass" which will scale by molecular mass, a single element which will scale by the elemental content (for instance "C" to scale by carbon content). If None every metabolite will receive the same weight. Will be ignored if `minimize_components` is True. atol : float Absolute tolerance for the growth rates. If None will use the solver tolerance. rtol : float Relative tolerqance for the growth rates. If None will use the solver tolerance. Returns ------- pandas.Series or dict A series {rid: flux} giving the import flux for each required import reaction. If `solution` is True retuns a dictionary {"medium": panas.Series, "solution": micom.CommunitySolution}. """ logger.info("calculating minimal medium for %s" % community.id) if atol is None: atol = community.solver.configuration.tolerances.optimality if rtol is None: rtol = community.solver.configuration.tolerances.optimality if exchanges is None: boundary_rxns = community.exchanges else: boundary_rxns = community.reactions.get_by_any(exchanges) if isinstance(open_exchanges, bool): open_bound = 1000 else: open_bound = open_exchanges min_growth = _format_min_growth(min_growth, community.taxa) with community as com: if open_exchanges: logger.info("opening exchanges for %d imports" % len(boundary_rxns)) for rxn in boundary_rxns: rxn.bounds = (-open_bound, open_bound) logger.info("applying growth rate constraints") _apply_min_growth(community, min_growth, atol, rtol) com.objective = Zero logger.info("adding new media objective") if minimize_components: add_mip_obj(com, boundary_rxns) else: scales = weight(boundary_rxns, weights) add_linear_obj(com, boundary_rxns, scales) sol = com.optimize(fluxes=True, pfba=False) if sol is None: logger.warning("minimization of medium was unsuccessful") return None logger.info("formatting medium") medium = pd.Series() ex = set(com.exchanges) & set(boundary_rxns) for rxn in ex: export = len(rxn.reactants) == 1 flux = sol.fluxes.loc["medium", rxn.id] if abs(flux) < atol: continue if export: medium[rxn.id] = -flux elif not export: medium[rxn.id] = flux if not exports: medium = medium[medium > 0.0] if solution: return {"medium": medium, "solution": sol} else: return medium
def add_moma_optcom(community, min_growth, linear=False): """Add a dualized MOMA version of OptCom. Solves a MOMA (minimization of metabolic adjustment) formulation of OptCom given by:: minimize cooperativity_cost s.t. maximize community_objective s.t. Sv = 0 lb >= v >= ub where community_cost = sum (growth_rate - max_growth)**2 if linear=False or community_cost = sum |growth_rate - max_growth| if linear=True Arguments --------- community : micom.Community The community to modify. min_growth : positive float or array-like object. The minimum growth rate for each individual in the community. Either a single value applied to all individuals or one value for each. linear : boolean Whether to use a non-linear (sum of squares) or linear version of the cooperativity cost. If set to False requires a QP-capable solver. """ logger.info("adding dual %s moma to %s" % ("linear" if linear else "quadratic", community.id)) check_modification(community) min_growth = _format_min_growth(min_growth, community.taxa) prob = community.solver.interface old_obj = community.objective coefs = old_obj.get_linear_coefficients(old_obj.variables) # Get maximum individual growth rates max_gcs = community.optimize_all(progress=False) _apply_min_growth(community, min_growth) dual_coefs = fast_dual(community) coefs.update({v: -coef for v, coef in dual_coefs.items()}) obj_constraint = prob.Constraint(Zero, lb=0, ub=0, name="optcom_suboptimality") community.add_cons_vars([obj_constraint]) community.solver.update() obj_constraint.set_linear_coefficients(coefs) obj_expr = Zero logger.info("adding expressions for %d taxa" % len(community.taxa)) for sp in community.taxa: v = prob.Variable("gc_constant_" + sp, lb=max_gcs[sp], ub=max_gcs[sp]) community.add_cons_vars([v]) taxa_obj = community.constraints["objective_" + sp] ex = v - taxa_obj.expression if not linear: ex = ex**2 obj_expr += ex.expand() community.objective = prob.Objective(obj_expr, direction="min") community.modification = "moma optcom" logger.info("finished dual moma to %s" % community.id)
def add_dualized_optcom(community, min_growth): """Add dual Optcom variables and constraints to a community. Uses the original formulation of OptCom and solves the following multi-objective problem:: maximize community_growth s.t. maximize growth_rate_i for all i s.t. Sv_i = 0 lb_i >= v_i >= ub_i Notes ----- This method will only find one arbitrary solution from the Pareto front. There may exist several other optimal solutions. Arguments --------- community : micom.Community The community to modify. min_growth : positive float or array-like object. The minimum growth rate for each individual in the community. Either a single value applied to all individuals or one value for each. """ logger.info("adding dual optcom to %s" % community.id) check_modification(community) min_growth = _format_min_growth(min_growth, community.taxa) prob = community.solver.interface # Temporarily subtitute objective with sum of individual objectives # for correct dual variables old_obj = community.objective community.objective = Zero for sp in community.taxa: taxa_obj = community.constraints["objective_" + sp] community.objective += taxa_obj.expression _apply_min_growth(community, min_growth) dual_coefs = fast_dual(community) logger.info("adding expressions for %d taxa" % len(community.taxa)) for sp in community.taxa: primal_const = community.constraints["objective_" + sp] coefs = primal_const.get_linear_coefficients(primal_const.variables) coefs.update({ dual_var: -coef for dual_var, coef in dual_coefs.items() if sp in dual_var.name }) obj_constraint = prob.Constraint(Zero, lb=0, ub=0, name="optcom_suboptimality_" + sp) community.add_cons_vars([obj_constraint]) community.solver.update() obj_constraint.set_linear_coefficients(coefs) community.objective = old_obj community.modification = "dual optcom" logger.info("finished adding dual optcom to %s" % community.id)
def fast_dual(model, prefix="dual_"): """Add dual formulation to the problem. A mathematical optimization problem can be viewed as a primal and a dual problem. If the primal problem is a minimization problem the dual is a maximization problem, and the optimal value of the dual is a lower bound of the optimal value of the primal. For linear problems, strong duality holds, which means that the optimal values of the primal and dual are equal (duality gap = 0). This functions takes an optlang Model representing a primal linear problem and returns a new Model representing the dual optimization problem. The provided model must have a linear objective, linear constraints and only continuous variables. Furthermore, the problem must be in standard form, i.e. all variables should be non-negative. Both minimization and maximization problems are allowed. Attributes ---------- model : cobra.Model The model to be dualized. prefix : str The string that will be prepended to all variable and constraint names in the returned dual problem. Returns ------- dict The coefficients for the new dual objective. """ logger.info("adding dual variables") if len(model.variables) > 1e5: logger.warning("the model has a lot of variables," "dual optimization will be extremely slow :O") prob = model.problem maximization = model.objective.direction == "max" if maximization: sign = 1 else: sign = -1 coefficients = {} dual_objective = {} to_add = [] # Add dual variables from primal constraints: for constraint in model.constraints: if constraint.expression == 0: continue # Skip empty constraint if not constraint.is_Linear: raise ValueError("Non-linear problems are not supported: " + str(constraint)) if constraint.lb is None and constraint.ub is None: logger.warning("skipped free constraint %s" % constraint.name) continue # Skip free constraint if constraint.lb == constraint.ub: const_var = prob.Variable(prefix + constraint.name + "_constraint", lb=None, ub=None) to_add.append(const_var) if constraint.lb != 0: dual_objective[const_var.name] = sign * constraint.lb coefs = constraint.get_linear_coefficients(constraint.variables) for variable, coef in coefs.items(): coefficients.setdefault(variable.name, {})[const_var.name] = (sign * coef) else: if constraint.lb is not None: lb_var = prob.Variable(prefix + constraint.name + "_constraint_lb", lb=0, ub=None) to_add.append(lb_var) if constraint.lb != 0: dual_objective[lb_var.name] = -sign * constraint.lb if constraint.ub is not None: ub_var = prob.Variable(prefix + constraint.name + "_constraint_ub", lb=0, ub=None) to_add.append(ub_var) if constraint.ub != 0: dual_objective[ub_var.name] = sign * constraint.ub if not (constraint.expression.is_Add or constraint.expression.is_Mul): raise ValueError("Invalid expression type: " + str(type(constraint.expression))) if constraint.expression.is_Add: coefficients_dict = constraint.get_linear_coefficients( constraint.variables) else: # constraint.expression.is_Mul: args = constraint.expression.args coefficients_dict = {args[1]: args[0]} for variable, coef in coefficients_dict.items(): if constraint.lb is not None: coefficients.setdefault(variable.name, {})[lb_var.name] = (-sign * coef) if constraint.ub is not None: coefficients.setdefault(variable.name, {})[ub_var.name] = (sign * coef) # Add dual variables from primal bounds for variable in model.variables: if not variable.type == "continuous": raise ValueError("Integer variables are not supported: " + str(variable)) if variable.lb is not None and variable.lb < 0: raise ValueError("Problem is not in standard form (" + variable.name + " can be negative)") if variable.lb > 0: bound_var = prob.Variable(prefix + variable.name + "_lb", lb=0, ub=None) to_add.append(bound_var) coefficients.setdefault(variable.name, {})[bound_var.name] = -sign dual_objective[bound_var.name] = -sign * variable.lb if variable.ub is not None: bound_var = prob.Variable(prefix + variable.name + "_ub", lb=0, ub=None) to_add.append(bound_var) coefficients.setdefault(variable.name, {})[bound_var.name] = sign if variable.ub != 0: dual_objective[bound_var.name] = sign * variable.ub model.add_cons_vars(to_add) # Add dual constraints from primal objective primal_objective_dict = model.objective.get_linear_coefficients( model.objective.variables) for variable in model.objective.variables: obj_coef = primal_objective_dict[variable] if maximization: const = prob.Constraint(S.Zero, lb=obj_coef, name=prefix + variable.name) else: const = prob.Constraint(S.Zero, ub=obj_coef, name=prefix + variable.name) model.add_cons_vars([const]) model.solver.update() coefs = { model.variables[vid]: coef for vid, coef in coefficients[variable.name].items() } const.set_linear_coefficients(coefs) # Make dual objective coefs = { model.variables[vid]: coef for vid, coef in dual_objective.items() if coef != 0 } logger.info("dual model has {} terms in objective".format(len(coefs))) return coefs
def minimal_medium( community, community_growth, exchanges=None, min_growth=0.0, exports=False, minimize_components=False, open_exchanges=False, solution=False, ): """Find the minimal growth medium for the community. Finds the minimal growth medium for the community which allows for community as well as individual growth. Here, a minimal medium can either be the medium requiring the smallest total import flux or the medium requiring the least components (ergo ingredients). Arguments --------- community : micom.Community The community to modify. community_growth : positive float The minimum community-wide growth rate. exchanges : list of cobra.Reactions The list of exchange reactions that are penalized. min_growth : positive float or array-like object. The minimum growth rate for each individual in the community. Either a single value applied to all individuals or one value for each. exports : boolean Whether to include export fluxes in the returned medium. Defaults to False which will only return import fluxes. minimize_components : boolean Whether to minimize the number of components instead of the total import flux. Might be more intuitive if set to True but may also be slow to calculate for large communities. open_exchanges : boolean or number Whether to ignore currently set bounds and make all exchange reactions in the model possible. If set to a number all exchange reactions will be opened with (-number, number) as bounds. solution : boolean Whether to also return the entire solution and all fluxes for the minimal medium. Returns ------- pandas.Series or dict A series {rid: flux} giving the import flux for each required import reaction. If `solution` is True retuns a dictionary {"medium": panas.Series, "solution": micom.CommunitySolution}. """ logger.info("calculating minimal medium for %s" % community.id) boundary_rxns = community.exchanges if isinstance(open_exchanges, bool): open_bound = 1000 else: open_bound = open_exchanges min_growth = _format_min_growth(min_growth, community.species) with community as com: if open_exchanges: logger.info("opening exchanges for %d imports" % len(boundary_rxns)) for rxn in boundary_rxns: rxn.bounds = (-open_bound, open_bound) logger.info("applying growth rate constraints") context = get_context(community) if context is not None: context(partial(reset_min_community_growth, com)) com.variables.community_objective.lb = community_growth _apply_min_growth(community, min_growth) com.objective = Zero logger.info("adding new media objective") if minimize_components: add_mip_obj(com, boundary_rxns) else: add_linear_obj(com, boundary_rxns) sol = com.optimize(fluxes=True, pfba=False) if sol is None: logger.warning("minimization of medium was unsuccessful") return None logger.info("formatting medium") medium = pd.Series() tol = community.solver.configuration.tolerances.feasibility for rxn in boundary_rxns: export = len(rxn.reactants) == 1 flux = sol.fluxes.loc["medium", rxn.id] if abs(flux) < tol: continue if export: medium[rxn.id] = -flux elif not export: medium[rxn.id] = flux if not exports: medium = medium[medium > 0] if solution: return {"medium": medium, "solution": sol} else: return medium