def solve(community, fluxes=True, pfba=True, raise_error=False, atol=1e-6, rtol=1e-6): """Get all fluxes stratified by taxa.""" community.solver.optimize() status = community.solver.status if status in good: if status != OPTIMAL: if raise_error: raise OptimizationError("solver returned the status %s." % status) else: logger.info("solver returned the status %s," % status + " returning the solution anyway.") if fluxes and pfba: add_pfba_objective(community, atol, rtol) community.solver.optimize() if fluxes: sol = CommunitySolution(community) else: sol = CommunitySolution(community, slim=True) return sol logger.warning("solver encountered an error %s" % status) return None
def _fix_medium(args): """Get the fixed medium for a model.""" sid, p, min_growth, max_import, min_c, medium, weights = args com = load_pickle(p) try: fixed = mm.complete_medium(com, medium, min_growth=min_growth, max_import=max_import, minimize_components=min_c, weights=weights) except Exception: logger.warning("Can't reach the specified growth rates for model %s." % sid) return None fixed = pd.DataFrame({"reaction": fixed.index, "flux": fixed.values}) fixed["metabolite"] = [ list(com.reactions.get_by_id(r).metabolites.keys())[0].id for r in fixed.reaction ] fixed["description"] = [ list(com.reactions.get_by_id(r).metabolites.keys())[0].name for r in fixed.reaction ] fixed["sample_id"] = sid return fixed
def medium(self, fluxes): """Set the medium for the community. Parameters ---------- fluxes : dict or pandas.Series The largest allowed import flux for the component. Index or key must be a reaction ID and the flux should be positive. """ if isinstance(fluxes, pd.Series): fluxes = fluxes.to_dict() exids = set(r.id for r in self.exchanges) rids = set(k for k in fluxes) found = rids & exids not_found = rids - exids if len(found) == 0: raise ValueError( "No ID from the medium could be found in the exchange reactions. " "This means you probably have mismatched IDs...") if len(not_found) > 0: logger.warning("I could not find the following exchanges " "in your model: %s" % ", ".join(not_found)) super(Community, type(self)).medium.fset(self, {rid: fluxes[rid] for rid in found})
def _tradeoff(args): p, tradeoffs, medium = args com = load_pickle(p) ex_ids = [r.id for r in com.exchanges] logger.info( "%d/%d import reactions found in model.", medium.index.isin(ex_ids).sum(), len(medium), ) com.medium = medium[medium.index.isin(ex_ids)] sol = com.optimize() rates = sol.members rates["taxon"] = rates.index rates["tradeoff"] = np.nan rates["sample_id"] = com.id df = [rates] # Get growth rates try: sol = com.cooperative_tradeoff(fraction=tradeoffs) except Exception as e: logger.warning("Sample %s could not be optimized\n %s" % (com.id, str(e))) return None for i, s in enumerate(sol.solution): rates = s.members rates["taxon"] = rates.index rates["tradeoff"] = sol.tradeoff[i] rates["sample_id"] = com.id df.append(rates) df = pd.concat(df) return df[df.taxon != "medium"]
def _growth(args): p, tradeoff, medium = args com = load_pickle(p) ex_ids = [r.id for r in com.exchanges] logger.info( "%d/%d import reactions found in model.", medium.index.isin(ex_ids).sum(), len(medium), ) com.medium = medium[medium.index.isin(ex_ids)] # Get growth rates try: sol = com.cooperative_tradeoff(fraction=tradeoff) rates = sol.members rates["taxon"] = rates.index rates["tradeoff"] = tradeoff rates["sample_id"] = com.id except Exception: logger.warning("Could not solve cooperative tradeoff for %s." % com.id) return None # Get the minimal medium med = minimal_medium(com, 0.95 * sol.growth_rate) # Apply medium and reoptimize com.medium = med[med > 0] sol = com.cooperative_tradeoff(fraction=tradeoff, fluxes=True, pfba=False) fluxes = sol.fluxes.loc[:, sol.fluxes.columns.str.startswith("EX_")].copy() fluxes["sample_id"] = com.id return {"growth": rates, "exchanges": fluxes}
def _growth(args): p, tradeoff, medium, weights, atol, rtol = args com = load_pickle(p) if atol is None: atol = com.solver.configuration.tolerances.optimality if rtol is None: rtol = com.solver.configuration.tolerances.optimality com = load_pickle(p) if "glpk" in interface_to_str(com.solver.interface): logger.error( "Community models were not built with a QP-capable solver. " "This means that you did not install CPLEX or Gurobi. " "If you did install one of the two please file a bug report " "at https://github.com/micom-dev/micom/issues." ) return None ex_ids = [r.id for r in com.exchanges] logger.info( "%d/%d import reactions found in model.", medium.index.isin(ex_ids).sum(), len(medium), ) com.medium = medium[medium.index.isin(ex_ids)] # Get growth rates try: sol = com.cooperative_tradeoff(fraction=tradeoff) rates = sol.members rates["taxon"] = rates.index rates["tradeoff"] = tradeoff rates["sample_id"] = com.id except Exception: logger.warning( "Could not solve cooperative tradeoff for %s. " "This can often be fixed by chosing ore permissive atol and rtol " "arguments." % com.id) return None # Get the minimal medium and the solution at the same time sol = minimal_medium( com, exchanges=None, community_growth=sol.growth_rate, min_growth=rates.growth_rate.drop("medium"), solution=True, weights=weights, atol=atol, rtol=rtol )["solution"] fluxes = sol.fluxes.loc[:, sol.fluxes.columns.str.startswith("EX_")].copy() fluxes["sample_id"] = com.id fluxes["tolerance"] = atol anns = annotate_metabolites_from_exchanges(com) return {"growth": rates, "exchanges": fluxes, "annotations": anns}
def _grow(args): """Get the maximum growth rate under a given medium.""" file, med = args mod = load_model(file) good = med[med.index.isin([r.id for r in mod.exchanges])] if len(good) == 0: logger.warning("Could not find any reactions from the medium in `%s`. " "Maybe a mismatch in IDs?") mod.medium = med[med.index.isin([r.id for r in mod.exchanges])] rate = mod.slim_optimize() return rate
def reset_solver(community): """Reset the solver.""" interface = interface_to_str(community.solver.interface) logger.info("resetting solver, hoping for the best.") if interface == "cplex": logger.warning("switching cplex LP algorithm to `network`.") community.solver.configuration.lp_method = "network" elif interface == "gurobi": community.solver.problem.reset() elif interface == "glpk": glp_adv_basis(community.solver.problem, 0)
def add_mip_obj(community, exchanges): """Add a mixed-integer version of a minimal medium to the community. Changes the optimization objective to finding the medium with the least components:: minimize size(R) where R part of import_reactions Arguments --------- community : micom.Community The community to modify. exchanges : list of cobra.Reaction The reactions to constrain. """ check_modification(community) if len(community.variables) > 1e4: logger.warning( "the MIP version of minimal media is extremely slow for" " models that large :(" ) boundary_rxns = exchanges M = max(np.max(np.abs(r.bounds)) for r in boundary_rxns) prob = community.problem coefs = {} to_add = [] for rxn in boundary_rxns: export = len(rxn.reactants) == 1 or ( len(rxn.reactants) == 2 and rxn.products[0].compartment == "m" ) indicator = prob.Variable("ind_" + rxn.id, lb=0, ub=1, type="binary") if export: vrv = rxn.reverse_variable indicator_const = prob.Constraint( vrv - indicator * M, ub=0, name="ind_constraint_" + rxn.id ) else: vfw = rxn.forward_variable indicator_const = prob.Constraint( vfw - indicator * M, ub=0, name="ind_constraint_" + rxn.id ) to_add.extend([indicator, indicator_const]) coefs[indicator] = 1 community.add_cons_vars(to_add) community.solver.update() community.objective.set_linear_coefficients(coefs) community.objective.direction = "min" community.modification = "minimal medium mixed-integer"
def _fix_medium(args): """Get the fixed medium for a model.""" mid, file, medium, min_growth, max_import, min_c = args model = load_model(file) for r in model.reactions: r.id = clean_ids(r.id) try: fixed = complete_medium( model, medium, min_growth=min_growth, max_import=max_import, minimize_components=min_c, ) except Exception: fixed = medium.copy() if model.solver.status != OPTIMAL: logger.warning("Can't reach the specified growth rate for model %s." % mid) fixed = medium.copy() fixed.name = mid return fixed
def media_and_gcs(sam): com = load_pickle("data/models/" + sam + ".pickle") # Get growth rates try: sol = com.cooperative_tradeoff(fraction=0.5) rates = sol.members["growth_rate"].copy() rates["community"] = sol.growth_rate rates.name = sam except Exception: logger.warning("Could not solve cooperative tradeoff for %s." % sam) return None # Get the minimal medium med = minimal_medium(com, 0.95 * sol.growth_rate, exports=True) med.name = sam # Apply medium and reoptimize com.medium = med[med > 0] sol = com.cooperative_tradeoff(fraction=0.5, fluxes=True, pfba=False) fluxes = sol.fluxes fluxes["sample"] = sam return {"medium": med, "gcs": rates, "fluxes": fluxes}
def media_and_gcs(sam): com = load_pickle(pickles_path +"/"+ sam) # Get growth rates try: sol = com.cooperative_tradeoff(fraction=trade_off) rates = sol.members["growth_rate"].copy() rates["community"] = sol.growth_rate rates.name = sam except Exception: logger.warning("Could not solve cooperative tradeoff for %s." % sam) return None # Get the minimal medium med = minimal_medium(com, 0.95 * sol.growth_rate, exports=True) med.name = sam # Apply medium and reoptimize com.medium = med[med > 0] sol = com.cooperative_tradeoff(fraction=0.5, fluxes=True, pfba=False) # uses the 'classic' FBA instead of the parsimonious FBA fluxes = sol.fluxes fluxes["sample"] = sam return {"medium": med, "gcs": rates, "fluxes": fluxes}
def __add_exchanges(self, reactions, info, exclude=default_excludes, external_compartment="e", internal_exchange=1000): """Add exchange reactions for a new model.""" for r in reactions: # Some sanity checks for whether the reaction is an exchange ex = external_compartment + "__" + r.community_id if (not r.boundary or any(bad in r.id for bad in exclude) or ex not in r.compartments): continue if not r.id.lower().startswith("ex"): logger.warning( "Reaction %s seems to be an exchange " % r.id + "reaction but its ID does not start with 'EX_'...") export = len(r.reactants) == 1 if export: lb = r.lower_bound / self.mass ub = r.upper_bound else: lb = -r.upper_bound / self.mass ub = -r.lower_bound if lb < 0.0 and lb > -1e-6: logger.info("lower bound for %r below numerical accuracy " "-> adjusting to stabilize model.") lb = -1e-6 if ub > 0.0 and ub < 1e-6: logger.info("upper bound for %r below numerical accuracy " "-> adjusting to stabilize model.") ub = 1e-6 met = (r.reactants + r.products)[0] medium_id = re.sub("_{}$".format(met.compartment), "", met.id) if medium_id in exclude: continue medium_id += "_m" if medium_id == met.id: medium_id += "_medium" if medium_id not in self.metabolites: # If metabolite does not exist in medium add it to the model # and also add an exchange reaction for the medium logger.info("adding metabolite %s to external medium" % medium_id) medium_met = met.copy() medium_met.id = medium_id medium_met.compartment = "m" medium_met.global_id = medium_id medium_met.community_id = "medium" ex_medium = cobra.Reaction(id="EX_" + medium_met.id, name=medium_met.id + " medium exchange", lower_bound=lb, upper_bound=ub) ex_medium.add_metabolites({medium_met: -1}) ex_medium.global_id = ex_medium.id ex_medium.community_id = "medium" self.add_reactions([ex_medium]) else: logger.info("updating import rate for external metabolite %s" % medium_id) medium_met = self.metabolites.get_by_id(medium_id) ex_medium = self.reactions.get_by_id("EX_" + medium_met.id) ex_medium.lower_bound = min(lb, ex_medium.lower_bound) ex_medium.upper_bound = max(ub, ex_medium.upper_bound) coef = info.abundance r.add_metabolites({medium_met: coef if export else -coef}) if export: r.lower_bound = -internal_exchange else: r.upper_bound = internal_exchange
def minimal_medium( community, community_growth, min_growth=0.0, exports=False, exchanges=None, minimize_components=False, open_exchanges=False, solution=False, weights=None, atol=None, rtol=None, ): """Find the minimal growth medium for the community. Finds the minimal growth medium for the community which allows for community as well as individual growth. Here, a minimal medium can either be the medium requiring the smallest total import flux or the medium requiring the least components (ergo ingredients). Arguments --------- community : micom.Community The community to modify. community_growth : positive float The minimum community-wide growth rate. min_growth : positive float or array-like object. The minimum growth rate for each individual in the community. Either a single value applied to all individuals or one value for each. exports : boolean Whether to include export fluxes in the returned medium. Defaults to False which will only return import fluxes. exchanges : list of cobra.Reactions The list of exchange reactions that are penalized. minimize_components : boolean Whether to minimize the number of components instead of the total import flux. Might be more intuitive if set to True but may also be slow to calculate for large communities. open_exchanges : boolean or number Whether to ignore currently set bounds and make all exchange reactions in the model possible. If set to a number all exchange reactions will be opened with (-number, number) as bounds. solution : boolean Whether to also return the entire solution and all fluxes for the minimal medium. weights : str Will scale the fluxes by a weight factor. Can either be "mass" which will scale by molecular mass, a single element which will scale by the elemental content (for instance "C" to scale by carbon content). If None every metabolite will receive the same weight. Will be ignored if `minimize_components` is True. atol : float Absolute tolerance for the growth rates. If None will use the solver tolerance. rtol : float Relative tolerqance for the growth rates. If None will use the solver tolerance. Returns ------- pandas.Series or dict A series {rid: flux} giving the import flux for each required import reaction. If `solution` is True retuns a dictionary {"medium": panas.Series, "solution": micom.CommunitySolution}. """ logger.info("calculating minimal medium for %s" % community.id) if atol is None: atol = community.solver.configuration.tolerances.optimality if rtol is None: rtol = community.solver.configuration.tolerances.optimality if exchanges is None: boundary_rxns = community.exchanges else: boundary_rxns = community.reactions.get_by_any(exchanges) if isinstance(open_exchanges, bool): open_bound = 1000 else: open_bound = open_exchanges min_growth = _format_min_growth(min_growth, community.taxa) with community as com: if open_exchanges: logger.info("opening exchanges for %d imports" % len(boundary_rxns)) for rxn in boundary_rxns: rxn.bounds = (-open_bound, open_bound) logger.info("applying growth rate constraints") _apply_min_growth(community, min_growth, atol, rtol) com.objective = Zero logger.info("adding new media objective") if minimize_components: add_mip_obj(com, boundary_rxns) else: scales = weight(boundary_rxns, weights) add_linear_obj(com, boundary_rxns, scales) sol = com.optimize(fluxes=True, pfba=False) if sol is None: logger.warning("minimization of medium was unsuccessful") return None logger.info("formatting medium") medium = pd.Series() ex = set(com.exchanges) & set(boundary_rxns) for rxn in ex: export = len(rxn.reactants) == 1 flux = sol.fluxes.loc["medium", rxn.id] if abs(flux) < atol: continue if export: medium[rxn.id] = -flux elif not export: medium[rxn.id] = flux if not exports: medium = medium[medium > 0.0] if solution: return {"medium": medium, "solution": sol} else: return medium
def fast_dual(model, prefix="dual_"): """Add dual formulation to the problem. A mathematical optimization problem can be viewed as a primal and a dual problem. If the primal problem is a minimization problem the dual is a maximization problem, and the optimal value of the dual is a lower bound of the optimal value of the primal. For linear problems, strong duality holds, which means that the optimal values of the primal and dual are equal (duality gap = 0). This functions takes an optlang Model representing a primal linear problem and returns a new Model representing the dual optimization problem. The provided model must have a linear objective, linear constraints and only continuous variables. Furthermore, the problem must be in standard form, i.e. all variables should be non-negative. Both minimization and maximization problems are allowed. Attributes ---------- model : cobra.Model The model to be dualized. prefix : str The string that will be prepended to all variable and constraint names in the returned dual problem. Returns ------- dict The coefficients for the new dual objective. """ logger.info("adding dual variables") if len(model.variables) > 1e5: logger.warning("the model has a lot of variables," "dual optimization will be extremely slow :O") prob = model.problem maximization = model.objective.direction == "max" if maximization: sign = 1 else: sign = -1 coefficients = {} dual_objective = {} to_add = [] # Add dual variables from primal constraints: for constraint in model.constraints: if constraint.expression == 0: continue # Skip empty constraint if not constraint.is_Linear: raise ValueError("Non-linear problems are not supported: " + str(constraint)) if constraint.lb is None and constraint.ub is None: logger.warning("skipped free constraint %s" % constraint.name) continue # Skip free constraint if constraint.lb == constraint.ub: const_var = prob.Variable(prefix + constraint.name + "_constraint", lb=None, ub=None) to_add.append(const_var) if constraint.lb != 0: dual_objective[const_var.name] = sign * constraint.lb coefs = constraint.get_linear_coefficients(constraint.variables) for variable, coef in coefs.items(): coefficients.setdefault(variable.name, {})[const_var.name] = (sign * coef) else: if constraint.lb is not None: lb_var = prob.Variable(prefix + constraint.name + "_constraint_lb", lb=0, ub=None) to_add.append(lb_var) if constraint.lb != 0: dual_objective[lb_var.name] = -sign * constraint.lb if constraint.ub is not None: ub_var = prob.Variable(prefix + constraint.name + "_constraint_ub", lb=0, ub=None) to_add.append(ub_var) if constraint.ub != 0: dual_objective[ub_var.name] = sign * constraint.ub if not (constraint.expression.is_Add or constraint.expression.is_Mul): raise ValueError("Invalid expression type: " + str(type(constraint.expression))) if constraint.expression.is_Add: coefficients_dict = constraint.get_linear_coefficients( constraint.variables) else: # constraint.expression.is_Mul: args = constraint.expression.args coefficients_dict = {args[1]: args[0]} for variable, coef in coefficients_dict.items(): if constraint.lb is not None: coefficients.setdefault(variable.name, {})[lb_var.name] = (-sign * coef) if constraint.ub is not None: coefficients.setdefault(variable.name, {})[ub_var.name] = (sign * coef) # Add dual variables from primal bounds for variable in model.variables: if not variable.type == "continuous": raise ValueError("Integer variables are not supported: " + str(variable)) if variable.lb is not None and variable.lb < 0: raise ValueError("Problem is not in standard form (" + variable.name + " can be negative)") if variable.lb > 0: bound_var = prob.Variable(prefix + variable.name + "_lb", lb=0, ub=None) to_add.append(bound_var) coefficients.setdefault(variable.name, {})[bound_var.name] = -sign dual_objective[bound_var.name] = -sign * variable.lb if variable.ub is not None: bound_var = prob.Variable(prefix + variable.name + "_ub", lb=0, ub=None) to_add.append(bound_var) coefficients.setdefault(variable.name, {})[bound_var.name] = sign if variable.ub != 0: dual_objective[bound_var.name] = sign * variable.ub model.add_cons_vars(to_add) # Add dual constraints from primal objective primal_objective_dict = model.objective.get_linear_coefficients( model.objective.variables) for variable in model.objective.variables: obj_coef = primal_objective_dict[variable] if maximization: const = prob.Constraint(S.Zero, lb=obj_coef, name=prefix + variable.name) else: const = prob.Constraint(S.Zero, ub=obj_coef, name=prefix + variable.name) model.add_cons_vars([const]) model.solver.update() coefs = { model.variables[vid]: coef for vid, coef in coefficients[variable.name].items() } const.set_linear_coefficients(coefs) # Make dual objective coefs = { model.variables[vid]: coef for vid, coef in dual_objective.items() if coef != 0 } logger.info("dual model has {} terms in objective".format(len(coefs))) return coefs
def minimal_medium( community, community_growth, exchanges=None, min_growth=0.0, exports=False, minimize_components=False, open_exchanges=False, solution=False, ): """Find the minimal growth medium for the community. Finds the minimal growth medium for the community which allows for community as well as individual growth. Here, a minimal medium can either be the medium requiring the smallest total import flux or the medium requiring the least components (ergo ingredients). Arguments --------- community : micom.Community The community to modify. community_growth : positive float The minimum community-wide growth rate. exchanges : list of cobra.Reactions The list of exchange reactions that are penalized. min_growth : positive float or array-like object. The minimum growth rate for each individual in the community. Either a single value applied to all individuals or one value for each. exports : boolean Whether to include export fluxes in the returned medium. Defaults to False which will only return import fluxes. minimize_components : boolean Whether to minimize the number of components instead of the total import flux. Might be more intuitive if set to True but may also be slow to calculate for large communities. open_exchanges : boolean or number Whether to ignore currently set bounds and make all exchange reactions in the model possible. If set to a number all exchange reactions will be opened with (-number, number) as bounds. solution : boolean Whether to also return the entire solution and all fluxes for the minimal medium. Returns ------- pandas.Series or dict A series {rid: flux} giving the import flux for each required import reaction. If `solution` is True retuns a dictionary {"medium": panas.Series, "solution": micom.CommunitySolution}. """ logger.info("calculating minimal medium for %s" % community.id) boundary_rxns = community.exchanges if isinstance(open_exchanges, bool): open_bound = 1000 else: open_bound = open_exchanges min_growth = _format_min_growth(min_growth, community.species) with community as com: if open_exchanges: logger.info("opening exchanges for %d imports" % len(boundary_rxns)) for rxn in boundary_rxns: rxn.bounds = (-open_bound, open_bound) logger.info("applying growth rate constraints") context = get_context(community) if context is not None: context(partial(reset_min_community_growth, com)) com.variables.community_objective.lb = community_growth _apply_min_growth(community, min_growth) com.objective = Zero logger.info("adding new media objective") if minimize_components: add_mip_obj(com, boundary_rxns) else: add_linear_obj(com, boundary_rxns) sol = com.optimize(fluxes=True, pfba=False) if sol is None: logger.warning("minimization of medium was unsuccessful") return None logger.info("formatting medium") medium = pd.Series() tol = community.solver.configuration.tolerances.feasibility for rxn in boundary_rxns: export = len(rxn.reactants) == 1 flux = sol.fluxes.loc["medium", rxn.id] if abs(flux) < tol: continue if export: medium[rxn.id] = -flux elif not export: medium[rxn.id] = flux if not exports: medium = medium[medium > 0] if solution: return {"medium": medium, "solution": sol} else: return medium
def plot_fit(results, phenotype, variable_type="binary", variable_name="phenotype", filename="fit_%s.html" % datetime.now().strftime("%Y%m%d"), flux_type="production", min_coef=0.001, atol=1e-6): """Test for differential metabolite production. This will fit the `phenotype` response using L1-regularized linear models with log-fluxes as features. Will use LASSO regression for a continuous response and L1-regularized Logistic regression for a binary response. Parameters ---------- results : micom.workflows.GrowthResults The results returned by the `grow` workflow. phenotype : pandas.Series The data to be fitted. Its index must correspond to `sample_id` in `exchanges`. variable_type : str of ["binary", "continuous"] The type of the variable. variable_name : str A short description of the phenotype for instance "disease_status". filename : str The HTML file where the visualization will be saved. flux_type : str of ["import", "production"] Whether to fit using import or production fluxes. min_coef : float in [0.0, Inf] Only report coefficient that are at least that large. atol : float Tolerance to consider a flux different from zero. Should be roughly equivalent to the solver tolerance. Returns ------- Visualization A MICOM visualization. Can be served with `viz.view`. """ exchanges = results.exchanges anns = results.annotations anns.index = anns.metabolite if flux_type == "import": exchanges = exchanges[(exchanges.taxon == "medium") & (exchanges.direction == "import")] exchanges["flux"] = exchanges.flux.abs() else: exchanges = exchanges[(exchanges.taxon != "medium") & (exchanges.direction == "export")] exchanges = (exchanges.groupby( ["reaction", "metabolite", "sample_id"]).apply(lambda df: pd.Series( {"flux": sum(df.abundance * df.flux.abs())})).reset_index()) exchanges = exchanges.loc[exchanges.flux > atol] if exchanges.shape[1] < 1: raise ValueError( "None of the fluxes passed the tolerance threshold :(") if variable_type == "binary" and phenotype.nunique() != 2: raise ValueError( "Binary variables must have exactly two unique values, yours " "has: %s." % ", ".join(phenotype.unique())) elif variable_type == "continuous" and not is_numeric_dtype(phenotype): raise ValueError( "Continuous variables must have a numeric type, but yours is" " of type `%s`." % phenotype.dtype) elif variable_type not in ["binary", "continuous"]: raise ValueError( "Unsupported variable type. Must be either `binary` or " "`continuous`.") fluxes = exchanges.pivot_table(index="sample_id", columns="metabolite", values="flux", fill_value=atol) fluxes = fluxes.applymap(np.log) meta = phenotype[fluxes.index] stds = fluxes.std(axis=1) bad = stds < 1e-6 if bad.any(): logger.warning("Removing %d fluxes due to zero variance." % bad.sum()) fluxes = fluxes.loc[:, ~bad] scaled = StandardScaler().fit_transform(fluxes) if variable_type == "binary": model = LogisticRegressionCV( penalty="l1", scoring="accuracy", solver="liblinear", cv=2, Cs=np.power(10.0, np.arange(-6, 6, 0.5)), max_iter=10000, ) fit = model.fit(scaled, meta) model = LogisticRegression( penalty="l1", solver="liblinear", C=fit.C_[0], max_iter=10000, ) fit = model.fit(scaled, meta) score = cross_val_score(model, X=scaled, y=meta, cv=LeaveOneOut()) coefs = pd.DataFrame({ "coef": fit.coef_[0, :], "metabolite": fluxes.columns }) else: model = LassoCV(cv=2, max_iter=10000) fit = model.fit(scaled, meta) model = Lasso(alpha=fit.alpha_, max_iter=10000) fit = model.fit(scaled, meta) score = cross_val_score(model, X=scaled, y=meta, cv=3) coefs = pd.DataFrame({"coef": fit.coef_, "metabolite": fluxes.columns}) coefs["description"] = anns.loc[coefs.metabolite, "name"].values score = [np.mean(score), np.std(score)] score.append(model.score(scaled, meta)) if all(coefs.coef.abs() < min_coef): raise RuntimeError( "Unfortunately no metabolite flux was predictive for the " "chosen phenotype and a cutoff of %g :(" % min_coef) data = {"fluxes": exchanges, "coefficients": coefs} coefs = coefs[coefs.coef.abs() >= min_coef].sort_values(by="coef") predicted = cross_val_predict(model, scaled, meta, cv=LeaveOneOut()) fitted = pd.DataFrame({ "real": meta, "predicted": predicted }, index=meta.index) exchanges = exchanges.loc[exchanges.metabolite.isin( coefs.metabolite.values)] exchanges["meta"] = meta[exchanges.sample_id].values exchanges["description"] = anns.loc[exchanges.metabolite, "name"].values var_type = "nominal" if variable_type == "binary" else "quantitative" viz = Visualization(filename, data, "tests.html") viz.save( fitted=fitted.to_json(orient="records"), coefs=coefs.to_json(orient="records"), exchanges=exchanges.to_json(orient="records"), metabolites=json.dumps(coefs.metabolite.tolist()), variable=variable_name, type=var_type, score=score, width=400, height=300, cheight=max(2 * coefs.shape[0], 40), cwidth=max(8 * coefs.shape[0], 160), ) return viz
def __init__( self, taxonomy, model_db=None, id=None, name=None, rel_threshold=1e-6, solver=None, progress=True, max_exchange=100, mass=1, ): """Create a new community object. `micom` builds a community from a taxonomy which may simply be a list of model files in its simplest form. Usually, the taxonomy will contain additional information such as annotations for the individuals (for instance phylum, organims or species) and abundances. The recommended way to build a micom model is to supply a quantification of taxa (called "taxonomy" here) which specifies the taxonomic ranks for a taxon and its abundance, and a model database for a specific rank (for instance "genus"). MICOM will match the ranks from your taxonomy to the model database and assemble the community models from that. You will also get information about the construction process by calling `Community.build_metrics`. The most customizable way only takes a single table where summarization and matching to the reference database has already occured. In this case you will also provide paths to model files for each taxon. This is the "old" way but may still be applicable if you want to use a custom database or want full control of matching your data to reference models. Notes ----- `micom` will automatically add exchange fluxes and and a community objective maximizing the overall growth rate of the community. Parameters ---------- taxonomy : pandas.DataFrame The taxonomy used for building the model. Must have at least the column "id". If no model database is specified in the next argument it furthermore requires a column "file" which specifies a filepath for each model. Valid file extensions are ".pickle", ".xml", ".xml.gz" and ".json". If a model database is specified this must contain at least a column with the same name as the rank used in the model database. Thus, for a genus-level database you will need a column `genus`. Additional taxa ranks can also be specified and will be used to be more stringent in taxa matching. Finally, the taxonomy should contain a column `abundance`. It will be used to quantify each individual in the community. If absent, MICOM will assume all individuals are present in the same amount. model_db : str A pre-built model database. If ending in `.qza` must be a Qiime 2 artifact of type `MetabolicModels[JSON]`. Can also be a folder, zip (must end in `.zip`) file or None if the taxonomy contains a column `file`. id : str, optional The ID for the community. Should only contain letters and numbers, otherwise it will be formatted as such. name : str, optional The name for the community. rel_threshold : float < 1, optional The relative abundance threshold that will be used. Describes the smallest relative amount of an individual that will be considered non-zero. All individuals with a smaller relative amount will be omitted. solver : str, optional Which solver to use. Will default to cplex if available which is better suited for large problems. progress : bool, optional Show a progress bar. max_exchange : positive float, optional During model constructions exchange reactions are duplicated into internal and external exchange reactions. This specifies the new import flux bound for the *internal* exchange reaction. Import rates for the exchanges between the medium and outside are still mantained. mass : positive float, optional The total mass of the community in gDW. Used to adjust import fluxes which are assumed to be given as mmol/gDW*h for the entire community. As a consequence all import fluxes will be divided by that number. Attributes ---------- taxa : list A list of taxa IDs in the community. """ super(Community, self).__init__(id, name) logger.info("building new micom model {}.".format(id)) if not solver: solver = [ s for s in ["cplex", "osqp", "gurobi", "glpk"] if s in cobra.util.solver.solvers ][0] logger.info("using the %s solver." % solver) if solver == "glpk": logger.warning( "No QP solver found, will use GLPK. A lot of functionality " "in MICOM will require a QP solver :/") self.solver.configuration.lp_method = "auto" self.solver.configuration.qp_method = "auto" self.solver.configuration.presolve = False self.solver = solver self._rtol = rel_threshold self._modification = None self.mass = mass self.__db_metrics = None adjust_solver_config(self.solver) taxonomy = taxonomy.copy() if "abundance" not in taxonomy.columns: taxonomy["abundance"] = 1 taxonomy.abundance /= taxonomy.abundance.sum() logger.info("{} individuals with abundances below threshold".format( (taxonomy.abundance <= self._rtol).sum())) taxonomy = taxonomy[taxonomy.abundance > self._rtol] if not (isinstance(taxonomy, pd.DataFrame) and "id" in taxonomy.columns): raise ValueError("`taxonomy` must be a pandas DataFrame with at" "least a column `id` :(") if model_db is None and "file" not in taxonomy.columns: raise ValueError( "If no model database is specified you need to pass " "file names for models in a `file` column as well.") compressed = False if model_db is not None: compressed = model_db.endswith(".qza") or model_db.endswith(".zip") if compressed: tdir = TemporaryDirectory(prefix="micom_") if "file" in taxonomy.columns: del taxonomy["file"] if model_db.endswith(".qza"): manifest = load_qiime_model_db(model_db, tdir.name) elif model_db.endswith(".zip"): manifest = load_zip_model_db(model_db, tdir.name) else: manifest = load_manifest(model_db) rank = manifest["summary_rank"][0] if rank not in taxonomy.columns: raise ValueError("Missing the column `%s` from the taxonomy." % rank) keep_cols = [ r for r in _ranks[0:(_ranks.index(rank) + 1)] if r in taxonomy.columns and r in manifest.columns ] manifest = manifest[keep_cols + ["file"]] merged = pd.merge(taxonomy, manifest, on=keep_cols) self.__db_metrics = pd.Series({ "found_taxa": merged.shape[0], "total_taxa": taxonomy.shape[0], "found_fraction": merged.shape[0] / taxonomy.shape[0], "found_abundance_fraction": merged.abundance.sum(), }) logger.info("Matched %g%% of total abundance in model DB." % (100.0 * self.__db_metrics[3])) if self.__db_metrics["found_abundance_fraction"] < 0.5: logger.warning( "Less than 50%% of the abundance could be matched to the " "model database. Model `%s` may not be representative " "of the sample" % self.id) taxonomy = merged taxonomy["abundance"] /= taxonomy["abundance"].sum() if taxonomy.id.str.contains(r"[^A-Za-z0-9_]", regex=True).any(): logger.warning("Taxa IDs contain prohibited characters and" " will be reformatted.") taxonomy.id = taxonomy.id.replace(r"[^A-Za-z0-9_\s]+", "_", regex=True) self.__taxonomy = taxonomy self.__taxonomy.index = self.__taxonomy.id obj = Zero self.taxa = [] index = self.__taxonomy.index index = tqdm(index, unit="models") if progress else index for idx in index: row = self.__taxonomy.loc[idx] if isinstance(row.file, list): if len(row.file) > 1: model = join_models(row.file) logger.info("joined {} models".format(len(row.file))) else: model = load_model(row.file[0]) else: model = load_model(row.file) suffix = "__" + idx.replace(" ", "_").strip() logger.info("converting IDs for {}".format(idx)) external = cobra.medium.find_external_compartment(model) logger.info("Identified %s as the external compartment for %s. " "If that is wrong you may be in trouble..." % (external, idx)) for r in model.reactions: r.global_id = clean_ids(r.id) r.id = r.global_id + suffix r.community_id = idx # avoids https://github.com/opencobra/cobrapy/issues/926 r._compartments = None # SBO terms may not be maintained if "sbo" in r.annotation: del r.annotation["sbo"] for m in model.metabolites: m.global_id = clean_ids(m.id) m.id = m.global_id + suffix m.compartment += suffix m.community_id = idx logger.info("adding reactions for {} to community".format(idx)) self.add_reactions(model.reactions) o = self.solver.interface.Objective.clone(model.objective, model=self.solver) obj += o.expression * row.abundance self.taxa.append(idx) taxa_obj = self.problem.Constraint(o.expression, name="objective_" + idx, lb=0.0) self.add_cons_vars([taxa_obj]) self.__add_exchanges( model.reactions, row, external_compartment=external, internal_exchange=max_exchange, ) self.solver.update() # to avoid dangling refs due to lazy add if compressed: tdir.cleanup() com_obj = add_var_from_expression(self, "community_objective", obj, lb=0) self.objective = self.problem.Objective(com_obj, direction="max")
def __init__(self, taxonomy, id=None, name=None, rel_threshold=1e-6, solver=None, progress=True, max_exchange=100, mass=1): """Create a new community object. `micom` builds a community from a taxonomy which may simply be a list of model files in its simplest form. Usually, the taxonomy will contain additional information such as annotations for the individuals (for instance phylum, organims or species) and abundances. Notes ----- `micom` will automatically add exchange fluxes and and a community objective maximizing the overall growth rate of the community. Parameters ---------- taxonomy : pandas.DataFrame The taxonomy used for building the model. Must have at least the two columns "id" and "file" which specify an ID and the filepath for each model. Valid file extensions are ".pickle", ".xml", ".xml.gz" and ".json". If the taxonomy includes a column named "abundance" it will be used to quantify each individual in the community. If absent `micom` will assume all individuals are present in the same amount. id : str, optional The ID for the community. Should only contain letters and numbers, otherwise it will be formatted as such. name : str, optional The name for the community. rel_threshold : float < 1, optional The relative abundance threshold that will be used. Describes the smallest relative amount of an individual that will be considered non-zero. All individuals with a smaller relative amount will be omitted. solver : str, optional Which solver to use. Will default to cplex if available which is better suited for large problems. progress : bool, optional Show a progress bar. max_exchange : positive float, optional During model constructions exchange reactions are duplicated into internal and external exchange reactions. This specifies the new import flux bound for the *internal* exchange reaction. Import rates for the exchanges between the medium and outside are still mantained. mass : positive float, optional The total mass of the community in gDW. Used to adjust import fluxes which are assumed to be given as mmol/gDW*h for the entire community. As a consequence all import fluxes will be divided by that number. Attributes ---------- species : list A list of species IDs in the community. """ super(Community, self).__init__(id, name) logger.info("building new micom model {}.".format(id)) if not solver: self.solver = ("cplex" if "cplex" in cobra.util.solver.solvers else "glpk") else: self.solver = solver adjust_solver_config(self.solver) if not (isinstance(taxonomy, pd.DataFrame) and all(col in taxonomy.columns for col in _taxonomy_cols)): raise ValueError("`taxonomy` must be a pandas DataFrame with at" "least columns id and file :(") self._rtol = rel_threshold self._modification = None self.mass = mass taxonomy = taxonomy.copy() if "abundance" not in taxonomy.columns: taxonomy["abundance"] = 1 taxonomy.abundance /= taxonomy.abundance.sum() logger.info("{} individuals with abundances below threshold".format( (taxonomy.abundance <= self._rtol).sum())) taxonomy = taxonomy[taxonomy.abundance > self._rtol] if taxonomy.id.str.contains(r"[^A-Za-z0-9_]", regex=True).any(): logger.warning("taxonomy IDs contain prohibited characters and" " will be reformatted") taxonomy.id = taxonomy.id.replace([r"[^A-Za-z0-9_\s]", r"\s+"], ["", "_"], regex=True) self.__taxonomy = taxonomy self.__taxonomy.index = self.__taxonomy.id obj = Zero self.species = [] index = self.__taxonomy.index index = tqdm(index, unit="models") if progress else index for idx in index: row = self.__taxonomy.loc[idx] if isinstance(row.file, list): if len(row.file) > 1: model = join_models(row.file) logger.info("joined {} models".format(len(row.file))) else: model = load_model(row.file[0]) else: model = load_model(row.file) suffix = "__" + idx.replace(" ", "_").strip() logger.info("converting IDs for {}".format(idx)) for r in model.reactions: r.global_id = re.sub("__\\d__", "_", r.id).strip(" _-") r.id = r.global_id + suffix r.community_id = idx for m in model.metabolites: m.global_id = re.sub("__\\d+__", "_", m.id).strip(" _-") m.id = m.global_id + suffix m.compartment += suffix m.community_id = idx logger.info("adding reactions for {} to community".format(idx)) self.add_reactions(model.reactions) o = self.solver.interface.Objective.clone(model.objective, model=self.solver) obj += o.expression * row.abundance self.species.append(idx) species_obj = self.problem.Constraint(o.expression, name="objective_" + idx, lb=0.0) self.add_cons_vars([species_obj]) self.__add_exchanges(model.reactions, row, internal_exchange=max_exchange) self.solver.update() # to avoid dangling refs due to lazy add com_obj = add_var_from_expression(self, "community_objective", obj, lb=0) self.objective = self.problem.Objective(com_obj, direction="max")
def __add_exchanges( self, reactions, info, external_compartment="e", internal_exchange=1000, ): """Add exchange reactions for a new model.""" for r in reactions: # Some sanity checks for whether the reaction is an exchange ex = external_compartment + "__" + r.community_id if not cobra.medium.is_boundary_type(r, "exchange", ex): continue #print(ex,r,r.id,r.compartments) <<---------------------------------------------------------------------------------------Remove this #print('exchange reaction') if not r.id.lower().startswith("ex"): print('exchange but does not start with EX_') logger.warning( "Reaction %s seems to be an exchange " % r.id + "reaction but its ID does not start with 'EX_'...") export = len(r.reactants) == 1 if export: lb = r.lower_bound / self.mass ub = r.upper_bound else: lb = -r.upper_bound / self.mass ub = -r.lower_bound if lb < 0.0 and lb > -1e-6: logger.info("lower bound for %r below numerical accuracy " "-> adjusting to stabilize model.") lb = -1e-6 if ub > 0.0 and ub < 1e-6: logger.info("upper bound for %r below numerical accuracy " "-> adjusting to stabilize model.") ub = 1e-6 met = (r.reactants + r.products)[0] old_compartment = met.compartment.replace("__" + r.community_id, "") medium_id = re.sub( "(_{}$)|([^a-zA-Z0-9 :]{}[^a-zA-Z0-9 :]$)".format( old_compartment, old_compartment), "", met.global_id, ) medium_id += "_m" #print(medium_id,'medium_id') <<------------------------------------------------------------------------------Reomve this if medium_id == met.id: medium_id += "_medium" if medium_id not in self.metabolites: # If metabolite does not exist in medium add it to the model # and also add an exchange reaction for the medium logger.info("adding metabolite %s to external medium" % medium_id) medium_met = met.copy() medium_met.id = medium_id medium_met.compartment = "m" medium_met.global_id = medium_id medium_met.community_id = "medium" ex_medium = cobra.Reaction( id="EX_" + medium_met.id, name=medium_met.id + " medium exchange", lower_bound=lb, upper_bound=ub, ) ex_medium.add_metabolites({medium_met: -1}) ex_medium.global_id = ex_medium.id ex_medium.community_id = "medium" self.add_reactions([ex_medium]) else: logger.info("updating import rate for external metabolite %s" % medium_id) medium_met = self.metabolites.get_by_id(medium_id) ex_medium = self.reactions.get_by_id("EX_" + medium_met.id) ex_medium.lower_bound = min(lb, ex_medium.lower_bound) ex_medium.upper_bound = max(ub, ex_medium.upper_bound) coef = info.abundance r.add_metabolites({medium_met: coef if export else -coef}) if export: r.lower_bound = -internal_exchange else: r.upper_bound = internal_exchange