def optgp(model): """Return OptGPSampler instance for tests.""" sampler = OptGPSampler(model, processes=1, thinning=1) assert ((sampler.n_warmup > 0) and (sampler.n_warmup <= 2 * len(model.variables))) assert all(sampler.validate(sampler.warmup) == "v") return sampler
def test_complicated_model(self): """Difficult model since the online mean calculation is numerically unstable so many samples weakly violate the equality constraints.""" model = Model('flux_split') reaction1 = Reaction('V1') reaction2 = Reaction('V2') reaction3 = Reaction('V3') reaction1.lower_bound = 0 reaction2.lower_bound = 0 reaction3.lower_bound = 0 reaction1.upper_bound = 6 reaction2.upper_bound = 8 reaction3.upper_bound = 10 A = Metabolite('A') reaction1.add_metabolites({A: -1}) reaction2.add_metabolites({A: -1}) reaction3.add_metabolites({A: 1}) model.add_reactions([reaction1]) model.add_reactions([reaction2]) model.add_reactions([reaction3]) optgp = OptGPSampler(model, 1, seed=42) achr = ACHRSampler(model, seed=42) optgp_samples = optgp.sample(100) achr_samples = achr.sample(100) assert any(optgp_samples.corr().abs() < 1.0) assert any(achr_samples.corr().abs() < 1.0) # > 95% are valid assert(sum(optgp.validate(optgp_samples) == "v") > 95) assert(sum(achr.validate(achr_samples) == "v") > 95)
def test_complicated_model(): """Test a complicated model. Difficult model since the online mean calculation is numerically unstable so many samples weakly violate the equality constraints. """ model = Model('flux_split') reaction1 = Reaction('V1') reaction2 = Reaction('V2') reaction3 = Reaction('V3') reaction1.bounds = (0, 6) reaction2.bounds = (0, 8) reaction3.bounds = (0, 10) A = Metabolite('A') reaction1.add_metabolites({A: -1}) reaction2.add_metabolites({A: -1}) reaction3.add_metabolites({A: 1}) model.add_reactions([reaction1, reaction2, reaction3]) optgp = OptGPSampler(model, 1, seed=42) achr = ACHRSampler(model, seed=42) optgp_samples = optgp.sample(100) achr_samples = achr.sample(100) assert any(optgp_samples.corr().abs() < 1.0) assert any(achr_samples.corr().abs() < 1.0) # > 95% are valid assert sum(optgp.validate(optgp_samples) == "v") > 95 assert sum(achr.validate(achr_samples) == "v") > 95
def capitulo_7(): file = open("resultados_capitulo_7.txt", "w") model = create_test_model("textbook") s = sample(model, 100) s.head() print("One process:") s = sample(model, 1000) print("Two processes:") s = sample(model, 1000, processes=2) s = sample(model, 100, method="achr") from cobra.flux_analysis.sampling import OptGPSampler, ACHRSampler achr = ACHRSampler(model, thinning=10) optgp = OptGPSampler(model, processes=4) s1 = achr.sample(100) s2 = optgp.sample(100) import numpy as np bad = np.random.uniform(-1000, 1000, size=len(model.reactions)) achr.validate(np.atleast_2d(bad)) achr.validate(s1) counts = [ np.mean(s.Biomass_Ecoli_core > 0.1) for s in optgp.batch(100, 10) ] file.write("Usually {:.2f}% +- {:.2f}% grow...".format( np.mean(counts) * 100.0, np.std(counts) * 100.0)) file.write("\n") co = model.problem.Constraint( model.reactions.Biomass_Ecoli_core.flux_expression, lb=0.1) model.add_cons_vars([co]) s = sample(model, 10) file.write(s.Biomass_Ecoli_core) file.write("\n") file.close()
def test_complicated_model(self): """Difficult model since the online mean calculation is numerically unstable so many samples weakly violate the equality constraints.""" model = Model('flux_split') reaction1 = Reaction('V1') reaction2 = Reaction('V2') reaction3 = Reaction('V3') reaction1.lower_bound = 0 reaction2.lower_bound = 0 reaction3.lower_bound = 0 reaction1.upper_bound = 6 reaction2.upper_bound = 8 reaction3.upper_bound = 10 A = Metabolite('A') reaction1.add_metabolites({A: -1}) reaction2.add_metabolites({A: -1}) reaction3.add_metabolites({A: 1}) model.add_reactions([reaction1]) model.add_reactions([reaction2]) model.add_reactions([reaction3]) optgp = OptGPSampler(model, 1, seed=42) achr = ACHRSampler(model, seed=42) optgp_samples = optgp.sample(100) achr_samples = achr.sample(100) assert any(optgp_samples.corr().abs() < 1.0) assert any(achr_samples.corr().abs() < 1.0) # > 95% are valid assert (sum(optgp.validate(optgp_samples) == "v") > 95) assert (sum(achr.validate(achr_samples) == "v") > 95)
def getFluxSample(self, nsamples=5000): """ Generates a sample of the flux cone. It uses the default sampler in cobrapy """ optGPS = OptGPSampler(self.GEM, thinning=100, processes=3) samplerSample = optGPS.sample(nsamples) sample = samplerSample[optGPS.validate(samplerSample) == "v"] return sample
def setup_class(self): from . import create_test_model model = create_test_model("textbook") achr = ACHRSampler(model, thinning=1) assert ((achr.n_warmup > 0) and (achr.n_warmup <= 2 * len(model.variables))) assert all(achr.validate(achr.warmup) == "v") self.achr = achr optgp = OptGPSampler(model, processes=1, thinning=1) assert ((optgp.n_warmup > 0) and (optgp.n_warmup <= 2 * len(model.variables))) assert all(optgp.validate(optgp.warmup) == "v") self.optgp = optgp
def setup_class(self): from . import create_test_model model = create_test_model("textbook") arch = ARCHSampler(model, thinning=1) assert ((arch.n_warmup > 0) and (arch.n_warmup <= 2 * len(model.reactions))) assert all(arch.validate(arch.warmup) == "v") self.arch = arch optgp = OptGPSampler(model, processes=1, thinning=1) assert ((optgp.n_warmup > 0) and (optgp.n_warmup <= 2 * len(model.reactions))) assert all(optgp.validate(optgp.warmup) == "v") self.optgp = optgp
def fluxSampling(model): from cobra.test import create_test_model from cobra.flux_analysis import sample model = create_test_model("textbook") s = sample(model, 100) #number of samples to generate s.head() s = sample(model, 1000) s #The sampling process can be controlled on a lower level by using the sampler classes directly. from cobra.flux_analysis.sampling import OptGPSampler, ACHRSampler achr = ACHRSampler( model, thinning=10 ) #“Thinning” means only recording samples every n iterations. A higher thinning factor means less correlated samples but also larger computation times. optgp = OptGPSampler( model, processes=4 ) #an additional processes argument specifying how many processes are used to create parallel sampling chains. #For OptGPSampler the number of samples should be a multiple of the number of # processes, otherwise it will be increased to the nearest multiple automatically. s1 = achr.sample(100) s2 = optgp.sample(100) # Sampling and validation import numpy as np bad = np.random.uniform(-1000, 1000, size=len(model.reactions)) achr.validate(np.atleast_2d(bad)) #should not be feasible achr.validate(s1) # Batch sampling counts = [ np.mean(s.Biomass_Ecoli_core > 0.1) for s in optgp.batch(100, 10) ] print("Usually {:.2f}% +- {:.2f}% grow...".format( np.mean(counts) * 100.0, np.std(counts) * 100.0)) # Adding constraints co = model.problem.Constraint( model.reactions.Biomass_Ecoli_core.flux_expression, lb=0.1) model.add_cons_vars([co]) # Note that this is only for demonstration purposes. usually you could set # the lower bound of the reaction directly instead of creating a new constraint. s = sample(model, 10) print(s.Biomass_Ecoli_core)
def test_optgp_init_benchmark(model, benchmark): """Benchmark inital OptGP sampling.""" benchmark(lambda: OptGPSampler(model, processes=2))
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Wed Jul 19 10:58:51 2017 @author: rohanroy """ from pandas import * from cobra.flux_analysis.sampling import OptGPSampler sampler1 = OptGPSampler(dnamodel1, processes=2, thinning=200) sampler2 = OptGPSampler(dnamodel2, processes=2, thinning=200) dnamodel1samples = sampler1.sample(4500) dnamodel2samples = sampler2.sample(4500) #%% for reaction in dnamodel1.reactions: reaction.averageflux=dnamodel1samples[reaction.id].mean() reaction.stdflux=dnamodel1samples[reaction.id].std() reaction.medianflux=dnamodel1samples[reaction.id].median() for reaction in dnamodel2.reactions: reaction.averageflux = dnamodel2samples[reaction.id].mean() reaction.stdflux=dnamodel2samples[reaction.id].std() reaction.medianflux=dnamodel2samples[reaction.id].median()
def test_optgp_init_benchmark(self, model, benchmark): benchmark(lambda: OptGPSampler(model, processes=2))
def pfba_gapfill(model, reaction_bag, obj=None, obj_lb=10., obj_constraint=False, iters=1, tasks=None, task_lb=0.05, add_exchanges=True, extracellular='e'): ''' Function that utilizes iterations of pFBA solution with a universal reaction bag in order to gapfill a model. Parameters ---------- model : cobra.Model Model to be gapfilled reaction_bag : cobra.Model Reaction bag reference to use during gapfilling obj : string Reaction ID for objective function in model to be gapfilled. obj_lb : float Lower bound for objective function obj_constraint : bool Sets objective as contstraint which must be maximized tasks : list or None List of reactions IDs (strings) of metabolic tasks to set a minimum lower bound for task_lb : float Lower bound for any metabolic tasks iters : int Number of gapfilling rounds. Unique reactions from each round are saved and the union is added simulatneously to the model add_exchanges : bool Identifies extracellular metabolites added during gapfilling that are not associated with exchange reactions and creates them extracellular : string Label for extracellular compartment of model ''' start_time = time.time() # Save some basic network info for downstream membership testing orig_rxn_ids = set([str(x.id) for x in model.reactions]) orig_cpd_ids = set([str(y.id) for y in model.metabolites]) univ_rxn_ids = set([str(z.id) for z in reaction_bag.reactions]) # Find overlap in model and reaction bag overlap_rxn_ids = univ_rxn_ids.intersection(orig_rxn_ids) # Get model objective reaction ID if obj == None: obj = get_objective(model) else: obj = obj # Modify universal reaction bag new_rxn_ids = set() print('Creating universal model...') with reaction_bag as universal: # Remove overlapping reactions from universal bag, and reset objective if needed for rxn in overlap_rxn_ids: universal.reactions.get_by_id(rxn).remove_from_model() # Set objective in universal if told by user # Made constraint as fraction of minimum in next step if obj_constraint: universal.add_reactions([model.reactions.get_by_id(obj)]) universal.objective = obj orig_rxn_ids.remove(obj) orig_rxns = [] for rxn in orig_rxn_ids: orig_rxns.append(copy.deepcopy(model.reactions.get_by_id(rxn))) else: orig_rxns = list(copy.deepcopy(model.reactions)) # Add pFBA to universal model and add model reactions add_pfba(universal) #universal = copy.deepcopy(universal) # reset solver universal.add_reactions(orig_rxns) # If previous objective not set as constraint, set minimum lower bound if not obj_constraint: universal.reactions.get_by_id(obj).lower_bound = obj_lb # Set metabolic tasks that must carry flux in gapfilled solution if tasks != None: for task in tasks: try: universal.reactions.get_by_id(task).lower_bound = task_lb except: print(task + 'not found in model. Ignoring.') continue # Run FBA and save solution print('Optimizing model with combined reactions...') solution = universal.optimize() if iters > 1: print('Generating flux sampling object...') optgp_object = OptGPSampler(universal, processes=4) # Assess the sampled flux distributions print('Sampling ' + str(iters) + ' flux distributions...') flux_samples = optgp_object.sample(iters) rxns = list(flux_samples.columns) for distribution in flux_samples.iterrows(): for flux in range(0, len(list(distribution[1]))): if abs(list(distribution[1])[flux]) > 1e-6: new_rxn_ids |= set([rxns[flux] ]).difference(orig_rxn_ids) else: rxns = list(solution.fluxes.index) fluxes = list(solution.fluxes) for flux in range(0, len(fluxes)): if abs(fluxes[flux]) > 1e-6: new_rxn_ids |= set([rxns[flux]]) # Screen new reaction IDs if obj in new_rxn_ids: new_rxn_ids.remove(obj) for rxn in orig_rxn_ids: try: new_rxn_ids.remove(rxn) except: continue # Get reactions and metabolites to be added to the model print('Gapfilling model...') new_rxns = copy.deepcopy( [reaction_bag.reactions.get_by_id(rxn) for rxn in new_rxn_ids]) new_cpd_ids = set() for rxn in new_rxns: new_cpd_ids |= set([str(x.id) for x in list(rxn.metabolites)]) new_cpd_ids = new_cpd_ids.difference(orig_cpd_ids) new_cpds = copy.deepcopy( [reaction_bag.metabolites.get_by_id(cpd) for cpd in new_cpd_ids]) # Copy model and gapfill new_model = copy.deepcopy(model) new_model.add_metabolites(new_cpds) new_model.add_reactions(new_rxns) # Identify extracellular metabolites with no exchanges if add_exchanges == True: new_exchanges = extend_exchanges(new_model, new_cpd_ids, extracellular) if len(new_exchanges) > 0: new_rxn_ids |= new_exchanges duration = int(round(time.time() - start_time)) print('Took ' + str(duration) + ' seconds to gapfill ' + str(len(new_rxn_ids)) + \ ' reactions and ' + str(len(new_cpd_ids)) + ' metabolites.') new_obj_val = new_model.slim_optimize() if new_obj_val > 1e-6: print('Gapfilled model objective now carries flux (' + str(new_obj_val) + ').') else: print('Gapfilled model objective still does not carry flux.') return new_model
def sampling_analysis(self, measurements): self.add_constraint(measurements) return OptGPSampler(self.model, processes=8).sample(10000)