def test_basic_gecko_adjustment(): in_model = {'P00549': 0.1, 'P31373': 0.1, 'P31382': 0.1, 'P39708': 0.1, 'P39714': 0.1, 'P39726': 0.1, 'Q01574': 0.1} not_in_model = {'P10591': 0.1, 'P31383': 0.1, 'P32471': 0.1} measurements = pd.concat([pd.Series(in_model), pd.Series(not_in_model)]) model = GeckoModel('multi-pool') model.limit_proteins(fractions=pd.Series(measurements)) sol = model.optimize() assert sol.objective_value > 0.05 assert len(model.proteins) - len(model.pool_proteins) - len(in_model) == 0 assert all(rxn.upper_bound > 0 for rxn in model.individual_protein_exchanges)
def essential_prot(): model = GeckoModel("single-pool") model.solver = 'cplex' print("Essential proteins with cplex, normal model (single-pool)") for p in model.proteins: with model as m: r = m.reactions.get_by_id("draw_prot_" + p) r.lower_bound = 0 r.upper_bound = 0 res = m.optimize() if (res.objective_value <= 1e-10): print(p, ",", res.objective_value)
def analysis_ko(resFileName): levels = [0, 1e-10, 1e-8, 1e-6, 1e-4, 1e-2, 0.1] model = GeckoModel('single-pool') df = pandas.DataFrame(index=range(100), columns=["ko", "Biomass"]) for i in range(100): proteins = random.sample(model.proteins, 10) dic = {p: 0 for p in proteins} model.limit_proteins(pandas.Series(dic)) res = model.optimize() df.loc[i] = (dic, res.objective_value) df.to_csv(resFileName)
def test_adjust_pool_bounds(): essential = {'P00498': 0., 'P00815': 0.} in_model = {'P00549': 0.1, 'P31373': 0.1, 'P31382': 0.1, 'P39708': 0.1, 'P39714': 0.1, 'P39726': 0.1, 'Q01574': 0.1} expected = set('prot_{}_exchange'.format(pool_id) for pool_id in essential) measurements = pd.concat([pd.Series(in_model), pd.Series(essential)]) model = GeckoModel('multi-pool') model.limit_proteins(fractions=pd.Series(measurements)) assert model.slim_optimize() < 1e-3 model.adjust_pool_bounds(inplace=False) assert model.slim_optimize() < 1e-3 adjustment = model.adjust_pool_bounds(inplace=True) observed = set(adjustment['reaction']) assert abs(model.slim_optimize() - 0.05) < 1e-3 assert expected == observed
def prot_ko_optim(prot_measure_fractions=None, prot_measure_ggdw=None, constraints=None, isMultiProc=False, size=1): #load model if prot_measure_fractions is None and prot_measure_ggdw is None: model = GeckoModel("single-pool") else: model = GeckoModel("multi-pool") if prot_measure_fractions: model.limit_proteins(fractions=prot_measure_fractions) else: model.limit_proteins(ggdw=prot_measure_ggdw) fileRes = basePath + "Results/optim_KO_Gecko_Yeast_SUCC_max5.csv" simulProb = GeckoSimulationProblem(model, constraints=constraints) evalFunc = build_evaluation_function("BPCY", "r_2111", "r_2056", "r_1714_REV") # max succ exchange gecko_strain_optim(simulProb, evaluationFunc=evalFunc, levels=None, isMultiProc=isMultiProc, candidateSize=size, resultFile=fileRes) #KO_Reaction by default
def essential_prot_scale(): model = GeckoModel("single-pool") model.solver = 'cplex' print("Essential proteins with cplex, scale model (single-pool)") for r in model.reactions: r.lower_bound = r.lower_bound * 100000 r.upper_bound = r.upper_bound * 100000 for p in model.proteins: with model as m: r = model.reactions.get_by_id("draw_prot_" + p) r.lower_bound = 0 r.upper_bound = 0 res = model.optimize() print(p, ",", res.objective_value)
def simulate_prot(): model = GeckoModel("single-pool") model.solver = 'cplex' with model: # for p in ["P53685","Q01574"]: for p in ['P33421']: r = model.reactions.get_by_id("draw_prot_" + p) r.lower_bound = 0 r.upper_bound = 0 res = model.optimize() print(" --> growth " + str(res.objective_value)) print(" --> r_2111 " + str(res.fluxes["r_2111"])) print(" --> r_2056 " + str(res.fluxes["r_2056"])) print(" --> r_1714 " + str(res.fluxes["r_1714_REV"])) print(" ------------ ")
def simulate_wt(): model = GeckoModel('single-pool') res = model.optimize() print(res) #for p in model.proteins: p = "P38066" with model: r = model.reactions.get_by_id("draw_prot_" + p) lb = r.lower_bound ub = r.upper_bound r.lower_bound = 0 r.upper_bound = 0.000001 res = model.optimize() #r.knock_out() #res = model.optimize() print(p + " wt simulation1 " + str(res.objective_value)) print(str(r.lower_bound) + " --> " + str(r.upper_bound))
def analysis_growth(resFileName): levels = [0, 1e-10, 1e-8, 1e-6, 1e-4, 1e-2, 0.1] model = GeckoModel('single-pool') proteins = model.proteins df = pandas.DataFrame(index=proteins, columns=levels) for p in proteins: print(p) if p != "P38066": for level in levels: r = model.reactions.get_by_id("draw_prot_" + p) lb = r.lower_bound ub = r.upper_bound r.lower_bound = 0 r.upper_bound = level res = model.optimize() df.loc[p][level] = res.objective_value r.lower_bound = lb r.upper_bound = ub df.to_csv(resFileName)
def test_gecko_adjustment_sanchez_etal(): mmol_gdw = pd.read_csv(os.path.join( os.path.dirname(__file__), '../geckopy/data_files/sanchez-mmol_gdw.csv'), index_col=0, header=None, squeeze=True) ggdw = pd.Series(PROTEIN_PROPERTIES.loc[mmol_gdw.index, 'mw'] / 1000.) * pd.Series(mmol_gdw) model = GeckoModel('multi-pool') growth_rate_unlimited_protein = model.slim_optimize() model.limit_proteins(ggdw=pd.Series(ggdw)) growth_rate_limited_protein = model.slim_optimize() # should be smaller, but how much.. assert growth_rate_limited_protein < 0.8 * growth_rate_unlimited_protein measured_in_model = set(mmol_gdw.index).intersection(model.proteins) assert sum(model.concentrations[p] - ggdw[p] for p in measured_in_model) < 1e-10 assert sum( abs(rxn.upper_bound - mmol_gdw[rxn.annotation['uniprot']]) for rxn in model.individual_protein_exchanges) < 1e-6 assert sum(rxn.metabolites[model.common_protein_pool] + PROTEIN_PROPERTIES.loc[rxn.annotation['uniprot'], 'mw'] / 1000. for rxn in model.pool_protein_exchanges) < 1e-6 assert model.p_measured > 0.25 # With yeast 8.1.3 -> p_measured = 0.296 assert model.f_mass_fraction_measured_matched_to_total > 0.25 # With yeast 8.1.3 -> f = 0.304 assert model.protein_pool_exchange.upper_bound > 0.015 # With yeast 8.1.3 -> pool_exchange = 0.0212
def test_gecko_adjustment_sanchez_etal(): mmol_gdw = pd.Series.from_csv( os.path.join(os.path.dirname(__file__), '../geckopy/data_files/sanchez-mmol_gdw.csv')) ggdw = pd.Series(PROTEIN_PROPERTIES.loc[mmol_gdw.index, 'mw'] / 1000.) * pd.Series(mmol_gdw) model = GeckoModel('multi-pool') growth_rate_unlimited_protein = model.slim_optimize() model.limit_proteins(ggdw=pd.Series(ggdw)) growth_rate_limited_protein = model.slim_optimize() # should be smaller, but how much.. assert growth_rate_limited_protein < 0.8 * growth_rate_unlimited_protein measured_in_model = set(mmol_gdw.index).intersection(model.proteins) assert sum(model.concentrations[p] - ggdw[p] for p in measured_in_model) < 1e-10 assert sum( abs(rxn.upper_bound - mmol_gdw[rxn.annotation['uniprot']]) for rxn in model.individual_protein_exchanges) < 1e-6 assert abs(model.p_measured - 0.296) < 1e-2 # section 3.2.6 reports 0.283 assert sum(rxn.metabolites[model.common_protein_pool] + PROTEIN_PROPERTIES.loc[rxn.annotation['uniprot'], 'mw'] / 1000. for rxn in model.pool_protein_exchanges) < 1e-6 # FIXME: section 3.2.6 reports 0.2154 assert abs(model.f_mass_fraction_measured_matched_to_total - 0.291) < 1e-2 # FIXME: provided model had 0.0168, value including p_base term is 0.0507 as in matlab.. assert abs(model.protein_pool_exchange.upper_bound - 0.0203) < 1e-2 sanchez_biomass = pd.Series.from_csv( os.path.join(os.path.dirname(__file__), '../geckopy/data_files/sanchez-biomass.csv')) biomass = pd.Series( dict((m.id, v) for m, v in model.reactions.r_4041.metabolites.items())) # FIXME: poor match with provided model for ATP, ADP, H+, H20, P assert sum((biomass - sanchez_biomass).abs()) < 20
def analysis_growth(resFileName, scale=False): model = GeckoModel('single-pool') model.solver = 'cplex' #scale model if scale: for r in model.reactions: r.upper_bound = r.upper_bound * 100000 r.lower_bound = r.lower_bound * 100000 proteins = model.proteins df = pandas.DataFrame(index=proteins, columns=range(100)) for i in range(100): print(i) for p in proteins: with model as m: r = m.reactions.get_by_id("draw_prot_" + p) r.lower_bound = 0 r.upper_bound = 0 res = m.optimize() df.loc[p][i] = 0 if res.objective_value < 1e-4 else 1 df.to_csv(resFileName)
def loading_yeast_gecko(prot_measure_fractions=None, prot_measure_ggdw=None): """ Loads the provided yeast gecko :param pd.Series prot_measure_fractions: measured fraction of proteins :param pd.Series prot_measure_ggdw: measured ggdw of proteins :return GeckoModel: """ if prot_measure_fractions is None and prot_measure_ggdw is None: model = GeckoModel("single-pool") else: model = GeckoModel("multi-pool") if prot_measure_fractions: model.limit_proteins(fractions=prot_measure_fractions) else: model.limit_proteins(ggdw=prot_measure_ggdw) return model
def simulate_wt_multi(): model = GeckoModel('multi-pool') import pandas some_measurements = pandas.Series({ 'P00549': 0.1, 'P31373': 0.1, 'P31382': 0.1 }) model = GeckoModel('multi-pool') model.limit_proteins(some_measurements) res = model.optimize() print(" wt simulation1 ", res.objective_value) for r in model.reactions: print(r.id, " --> ", res.fluxes[r.id])
def loading_any_gecko(path, biomass, protein=None, carbs=None): """ Default templeate function to load geckos :param str path: string path to the sbml file :param str biomass: biomass function id :param str protein: protein reaction id :param str carbs: carbohydrate function id :return GeckoModel: """ if not protein: protein = biomass if not carbs: carbs = biomass any_sbml_model = read_sbml_model(path) any_gecko = GeckoModel(model=any_sbml_model, biomass_reaction_id=biomass, protein_reaction_id=protein, carbohydrate_reaction_id=carbs) return any_gecko
pool.join() except (OSError, RuntimeError) as e: logger.error('failed parallel_evaluation_mp: {0}'.format(str(e))) raise else: end = time.time() print('completed parallel_evaluation_mp in {0} seconds'.format(end - start)) logger.debug( 'completed parallel_evaluation_mp in {0} seconds'.format(end - start)) # print("--- %s seconds ---" % (time.time() - start), 'end_pop') return [r.get()[0] for r in results] model = GeckoModel('single-pool') const = {'r_1714_REV': (0, 1)} simulProblem = GeckoSimulationProblem(model, const) ids = [ x for x in simulProblem.model.proteins if x not in simulProblem.objective.keys() ] decoder = DecoderProtKnockouts(ids) evalFunc = build_evaluation_function("targetFlux", ["r_2056"]) eaConfig = EAConfigurations() optimProbConf = OptimProblemConfiguration( simulProblem, type=optimType.PROTEIN_KO, decoder=decoder, evaluationFunc=evalFunc,