def create_model(model_name, thermo_data=None, lexicon=None, compartment_data=None) -> ThermoModel: """ Creates a tFBA-ready model. Parameters ---------- model_name : str The name of a model. thermo_data : dict, optional A thermodynamic database. If specified, ``lexicon`` and ``compartment data`` are required. lexicon : pandas.DataFrame, optional A dataframe linking metabolite IDs to SEED compound IDs. If specified, ``thermo_data`` and ``compartment_data`` are required. compartment_data : dict, optional A dictionary containing information about each compartment of the model. If specified, ``thermo_data`` and ``lexicon`` are required. Returns ------- pytfa.ThermoModel A thermodynamic database. Raises ------ ValueError If any (but not all) of ``thermo_data``, ``lexicon``, and ``compartment_data`` is None. """ data_is_none = [ data is None for data in [thermo_data, lexicon, compartment_data] ] if all(data_is_none): thermo_data, lexicon, compartment_data = load_data(model_name) elif any(data_is_none): raise ValueError("Not all required data supplied.") # due to a bug on pytfa, the logger is created with "None" as name _silence_pytfa(f"thermomodel_{None}") # however, if the model ends up being copied the correct name will be used, so this logger should be silenced too _silence_pytfa(f"thermomodel_{model_name}") cmodel = load_cbm(model_name) tmodel = ThermoModel(thermo_data, cmodel) tmodel.name = model_name annotate_from_lexicon(tmodel, lexicon) apply_compartment_data(tmodel, compartment_data) if tmodel.solver.interface.__name__ == "optlang.gurobi_interface": tmodel.solver.problem.Params.NumericFocus = 3 tmodel.solver.configuration.tolerances.feasibility = 1e-9 tmodel.solver.configuration.presolve = True tmodel.prepare() tmodel.convert(verbose=False) return tmodel
def convert2thermo(model, name): # Initialize the model tmodel = pytfa.ThermoModel(thermo_data, model) tmodel.name = name # Annotate the model annotate_from_lexicon(tmodel, lexicon) apply_compartment_data(tmodel, compartment_data) ## TFA conversion tmodel.prepare() tmodel.convert() tmodel.solver = solver return tmodel
def create_etfl_model( has_thermo, has_neidhardt, n_mu_bins=64, mu_max=3, optimize=True, ): #------------------------------------------------------------ # Initialisation #------------------------------------------------------------ # this hack works because we are using the solver switch to update the var # names in the solver but really we should not do this # TODO: clean up model.sanitize_varnames growth_reaction_id = 'Biomass_Ecoli_core' vanilla_model = create_fba_model() vanilla_model.objective = growth_reaction_id fba_sol = vanilla_model.slim_optimize() mu_0 = fba_sol mu_range = [0, mu_max] n_mu_bins = n_mu_bins coupling_dict = get_coupling_dict(vanilla_model, mode='kmax', atps_name='ATPS4r') # coupling_dict = get_lloyd_coupling_dict(vanilla_model) # Initialize the model name = 'small_model_T{:1}E{:1}N{:1}_{}_enz_{}_bins.json'.format( has_thermo, True, has_neidhardt, len(coupling_dict), n_mu_bins) # for k,v in coupling_dict.items(): # for enz in v: # enz.kcat_fwd = enz.kcat_bwd = 1e9 if has_thermo: thermo_data, lexicon, compartment_data = get_thermo_data() ecoli = ThermoMEModel( thermo_data, model=vanilla_model, growth_reaction=growth_reaction_id, mu_range=mu_range, n_mu_bins=n_mu_bins, name=name, ) else: ecoli = MEModel( model=vanilla_model, growth_reaction=growth_reaction_id, mu_range=mu_range, n_mu_bins=n_mu_bins, name=name, ) ecoli.name = name ecoli.logger.setLevel(logging.WARNING) ecoli.sloppy = True # Solver settings ecoli.solver = solver standard_solver_config(ecoli) if has_thermo: # Annotate the cobra_model annotate_from_lexicon(ecoli, lexicon) apply_compartment_data(ecoli, compartment_data) # TFA conversion ecoli.prepare() ecoli.convert() #add_displacement = True) nt_sequences = get_nt_sequences() mrna_dict = get_mrna_dict(ecoli) rnap = get_rnap() rib = get_rib() all_peptides = set([ x for enzymes in coupling_dict.values() for enz in enzymes for x in enz.composition ]) prune_to_genes = lambda the_dict:{k:v for k,v in the_dict.items() \ if k in vanilla_model.genes or k in rib.rrna_composition or k in rib.composition or k in rnap.composition or k in all_peptides} nt_sequences = prune_to_genes(nt_sequences) mrna_dict = prune_to_genes(mrna_dict) # Remove nucleotides and amino acids from biomass reaction as they will be # taken into account by the expression remove_from_biomass_equation( model=ecoli, nt_dict=rna_nucleotides, aa_dict=aa_dict, atp_id=essentials['atp'], adp_id=essentials['adp'], pi_id=essentials['pi'], h2o_id=essentials['h2o'], h_id=essentials['h'], ) ########################## ## MODEL CREATION ## ########################## ecoli.add_nucleotide_sequences(nt_sequences) ecoli.add_essentials(essentials=essentials, aa_dict=aa_dict, rna_nucleotides=rna_nucleotides, rna_nucleotides_mp=rna_nucleotides_mp) ecoli.add_mrnas(mrna_dict.values()) ecoli.add_ribosome(rib, free_ratio=0.2) # http://bionumbers.hms.harvard.edu/bionumber.aspx?id=102348&ver=1&trm=rna%20polymerase%20half%20life&org= # Name Fraction of active RNA Polymerase # Bionumber ID 102348 # Value 0.17-0.3 unitless # Source Bremer, H., Dennis, P. P. (1996) Modulation of chemical composition and other parameters of the cell by growth rate. # Neidhardt, et al. eds. Escherichia coli and Salmonella typhimurium: Cellular # and Molecular Biology, 2nd ed. chapter 97 Table 1 ecoli.add_rnap(rnap, free_ratio=0.75) ecoli.build_expression() ecoli.add_enzymatic_coupling(coupling_dict) if has_neidhardt: nt_ratios, aa_ratios = get_ratios() chromosome_len, gc_ratio = get_ecoli_gen_stats() kdeg_mrna, mrna_length_avg = get_mrna_metrics() kdeg_enz, peptide_length_avg = get_enz_metrics() neidhardt_mu, neidhardt_rrel, neidhardt_prel, neidhardt_drel = get_neidhardt_data( ) ecoli.add_interpolation_variables() ecoli.add_dummies(nt_ratios=nt_ratios, mrna_kdeg=kdeg_mrna, mrna_length=mrna_length_avg, aa_ratios=aa_ratios, enzyme_kdeg=kdeg_enz, peptide_length=peptide_length_avg) ecoli.add_protein_mass_requirement(neidhardt_mu, neidhardt_prel) ecoli.add_rna_mass_requirement(neidhardt_mu, neidhardt_rrel) ecoli.add_dna_mass_requirement(mu_values=neidhardt_mu, dna_rel=neidhardt_drel, gc_ratio=gc_ratio, chromosome_len=chromosome_len, dna_dict=dna_nucleotides) # Need to put after, because dummy has to be taken into account if used. ecoli.populate_expression() ecoli.add_trna_mass_balances() ecoli.print_info() need_relax = False ecoli.repair() if optimize: try: ecoli.optimize() print('Objective : {}'.format( ecoli.solution.objective_value)) print(' - Glucose uptake : {}'.format( ecoli.reactions.EX_glc__D_e.flux)) print(' - Growth : {}'.format( ecoli.growth_reaction.flux)) print(' - Ribosomes produced: {}'.format(ecoli.ribosome.X)) print(' - RNAP produced: {}'.format(ecoli.rnap.X)) except (AttributeError, SolverError): pass return ecoli
print("Done !") #biomass_rxn = 'BIOMASS_Ec_iJO1366_WT_53p95M' biomass_rxn = 'Ec_biomass_iJO1366_WT_53p95M' # We import pre-compiled data as it is faster for bigger models model_path = '/projectnb2/bioinfor/SEGRE/goldford/CoenzymeSpecificity/pytfa/models' cobra_model = load_json_model(model_path + '/iJO1366_NAD_ratio_1.fromTFA.json') lexicon = read_lexicon(model_path + '/iJO1366/lexicon.csv') compartment_data = read_compartment_data(model_path + '/iJO1366/compartment_data.json') # Initialize the cobra_model mytfa = pytfa.ThermoModel(thermo_data, cobra_model) # Annotate the cobra_model annotate_from_lexicon(mytfa, lexicon) apply_compartment_data(mytfa, compartment_data) mytfa.name = 'iJO1366[NAD]' mytfa.solver = solver mytfa.objective = biomass_rxn # Solver settings def apply_solver_settings(model, solver=solver): model.solver = solver # model.solver.configuration.verbosity = 1 model.solver.configuration.tolerances.feasibility = 1e-9 if solver == 'optlang_gurobi': model.solver.problem.Params.NumericFocus = 3
def create_etfl_model( has_thermo, has_neidhardt, n_mu_bins=64, mu_max=3, optimize=True, solver=DEFAULT_SOLVER, ): #------------------------------------------------------------ # Initialisation #------------------------------------------------------------ # this hack works because we are using the solver switch to update the var # names in the solver but really we should not do this # TODO: clean up model.sanitize_varnames growth_reaction_id = 'Biomass_Ecoli_core' vanilla_model = create_fba_model() vanilla_model.objective = growth_reaction_id fba_sol = vanilla_model.slim_optimize() mu_0 = fba_sol mu_range = [0, mu_max] n_mu_bins = n_mu_bins coupling_dict = get_coupling_dict(vanilla_model, mode='kmax', atps_name='ATPS4r') # coupling_dict = get_lloyd_coupling_dict(vanilla_model) # Initialize the model name = 'small_model_T{:1}E{:1}N{:1}_{}_enz_{}_bins.json'.format( has_thermo, True, has_neidhardt, len(coupling_dict), n_mu_bins) # for k,v in coupling_dict.items(): # for enz in v: # enz.kcat_fwd = enz.kcat_bwd = 1e9 if has_thermo: thermo_data, lexicon, compartment_data = get_thermo_data() ecoli = ThermoMEModel( thermo_data, model=vanilla_model, growth_reaction=growth_reaction_id, mu_range=mu_range, n_mu_bins=n_mu_bins, name=name, ) else: ecoli = MEModel( model=vanilla_model, growth_reaction=growth_reaction_id, mu_range=mu_range, n_mu_bins=n_mu_bins, name=name, ) ecoli.name = name ecoli.logger.setLevel(logging.WARNING) ecoli.sloppy = True # Solver settings ecoli.solver = solver standard_solver_config(ecoli) if has_thermo: # Annotate the cobra_model annotate_from_lexicon(ecoli, lexicon) apply_compartment_data(ecoli, compartment_data) # TFA conversion ecoli.prepare() ecoli.convert() #add_displacement = True) nt_sequences = get_nt_sequences() mrna_dict = get_mrna_dict(ecoli) rnap = get_rnap() rib = get_rib() all_peptides = set([ x for enzymes in coupling_dict.values() for enz in enzymes for x in enz.composition ]) prune_to_genes = lambda the_dict:{k:v for k,v in the_dict.items() \ if k in vanilla_model.genes or k in rib.rrna_composition or k in rib.composition or k in rnap.composition or k in all_peptides} nt_sequences = prune_to_genes(nt_sequences) mrna_dict = prune_to_genes(mrna_dict) # Remove nucleotides and amino acids from biomass reaction as they will be # taken into account by the expression remove_from_biomass_equation(model=ecoli, nt_dict=rna_nucleotides, aa_dict=aa_dict, essentials_dict=essentials) ########################## ## MODEL CREATION ## ########################## ecoli.add_nucleotide_sequences(nt_sequences) ecoli.add_essentials(essentials=essentials, aa_dict=aa_dict, rna_nucleotides=rna_nucleotides, rna_nucleotides_mp=rna_nucleotides_mp) ecoli.add_mrnas(mrna_dict.values()) ecoli.add_ribosome(rib, 0.2) ecoli.add_rnap(rnap, 0.75) ecoli.build_expression() ecoli.add_enzymatic_coupling(coupling_dict) nt_ratios, aa_ratios = get_ratios() chromosome_len, gc_ratio = get_ecoli_gen_stats() kdeg_mrna, mrna_length_avg = get_mrna_metrics() kdeg_enz, peptide_length_avg = get_enz_metrics() ecoli.add_dummies(nt_ratios=nt_ratios, mrna_kdeg=kdeg_mrna, mrna_length=mrna_length_avg, aa_ratios=aa_ratios, enzyme_kdeg=kdeg_enz, peptide_length=peptide_length_avg) if has_neidhardt: neidhardt_mu, neidhardt_rrel, neidhardt_prel, neidhardt_drel = get_neidhardt_data( ) add_protein_mass_requirement(ecoli, neidhardt_mu, neidhardt_prel) add_rna_mass_requirement(ecoli, neidhardt_mu, neidhardt_rrel) add_dna_mass_requirement(ecoli, mu_values=neidhardt_mu, dna_rel=neidhardt_drel, gc_ratio=gc_ratio, chromosome_len=chromosome_len, dna_dict=dna_nucleotides) # Need to put after, because dummy has to be taken into account if used. ecoli.populate_expression() ecoli.add_trna_mass_balances() ecoli.print_info() need_relax = False ### a problem with the solver, which can be temporarily solved ecoli.constraints.MB_b3855.set_linear_coefficients( {ecoli.variables.b3855_degradation: -1e-5}) ecoli.repair() if optimize: try: ecoli.optimize() print('Objective : {}'.format( ecoli.solution.objective_value)) print(' - Glucose uptake : {}'.format( ecoli.reactions.EX_glc__D_e.flux)) print(' - Growth : {}'.format( ecoli.growth_reaction.flux)) print(' - Ribosomes produced: {}'.format(ecoli.ribosome.X)) print(' - RNAP produced: {}'.format(ecoli.rnap.X)) except (AttributeError, SolverError): pass return ecoli
def apply_annotation_data(self): # for met in self.model.metabolites: # if 'seed.compound' in met.annotation: # met.annotation = {'seed_id': met.annotation['seed.compound'][0]} annotate_from_lexicon( self.model, read_lexicon(join(data_dir, 'thermo/lexicon.csv')))
def create_model(has_thermo, has_expression, has_allocation, kcat_mode='kmax', infer_missing_enz=False, additional_enz=None, free_rib_ratio=0.2, free_rnap_ratio=0.75, add_displacement=False, n_mu_bins=128, name_suffix='', kcat_overrides=None): #------------------------------------------------------------ # Initialisation #------------------------------------------------------------ assert has_expression == True # this hack works because we are using the solver switch to update the var # names in the solver but really we should not do this # TODO: clean up model.sanitize_varnames vanilla_model = get_model('optlang-glpk') vanilla_model.reactions.EX_glc__D_e.lower_bound = -1 * glc_uptake - glc_uptake_std vanilla_model.reactions.EX_glc__D_e.upper_bound = -1 * glc_uptake + glc_uptake_std vanilla_model.objective = growth_reaction_id fba_sol = vanilla_model.slim_optimize() # vanilla_model.reactions.get_by_id(growth_reaction_id).lower_bound = observed_growth # fva = flux_variability_analysis(vanilla_model) # vanilla_model.reactions.get_by_id(growth_reaction_id).lower_bound = 0 # original_bounds = pd.DataFrame.from_dict( # {r.id:(r.lower_bound, r.upper_bound) # for r in vanilla_model.reactions}, orient = 'index') # original_bounds.columns = ['lb','ub'] mu_0 = fba_sol mu_range = [0, 3.5] n_mu_bins = n_mu_bins time_str = get_timestr() coupling_dict = get_coupling_dict(vanilla_model, mode=kcat_mode, atps_name='ATPS4rpp', infer_missing_enz=infer_missing_enz) if additional_enz is not None: additional_dict = get_transporters_coupling( model=vanilla_model, additional_enz=additional_enz) additional_dict.update(coupling_dict) coupling_dict = additional_dict if kcat_overrides is not None: for rxn, enz_list in coupling_dict.items(): for e in enz_list: if e.id in kcat_overrides: prev_kcat = e.kcat_fwd new_kcat = kcat_overrides[e.id] e.kcat_fwd = new_kcat e.kcat_bwd = new_kcat print('Replaced kcat for {}: {} <-- {} s-1'.format( e.id, prev_kcat / 3600, new_kcat / 3600)) aa_dict, rna_nucleotides, rna_nucleotides_mp, dna_nucleotides = get_monomers_dict( ) essentials = get_essentials() # Initialize the model model_name = 'ETFL' if has_thermo else 'EFL' model_name = ('v' + model_name) if has_allocation else model_name model_name = (model_name + '_{}'.format(name_suffix)) if name_suffix else model_name model_name = (model_name + '_infer') if bool(infer_missing_enz) else model_name name = 'iJO1366_{}_{}_enz_{}_bins_{}.json'.format(model_name, len(coupling_dict), n_mu_bins, time_str) if has_thermo: thermo_data, lexicon, compartment_data = get_thermo_data() ecoli = ThermoMEModel( thermo_data, model=vanilla_model, growth_reaction=growth_reaction_id, mu_range=mu_range, n_mu_bins=n_mu_bins, name=name, ) else: ecoli = MEModel( model=vanilla_model, growth_reaction=growth_reaction_id, mu_range=mu_range, n_mu_bins=n_mu_bins, name=name, ) ecoli.name = name ecoli.logger.setLevel(logging.WARNING) ecoli.sloppy = True # apply_bounds(ecoli,fva) ecoli.solver = solver standard_solver_config(ecoli, verbose=False) if has_thermo: # Annotate the cobra_model annotate_from_lexicon(ecoli, lexicon) apply_compartment_data(ecoli, compartment_data) # TFA conversion ecoli.prepare() ecoli.convert(add_displacement=add_displacement) mrna_dict = get_mrna_dict(ecoli) nt_sequences = get_nt_sequences() rnap = get_rnap() # rnap.kcat_fwd *= 0.5 rib = get_rib() # Remove nucleotides and amino acids from biomass reaction as they will be # taken into account by the expression remove_from_biomass_equation(model=ecoli, nt_dict=rna_nucleotides, aa_dict=aa_dict, essentials_dict=essentials) ########################## ## MODEL CREATION ## ########################## ecoli.add_nucleotide_sequences(nt_sequences) ecoli.add_essentials(essentials=essentials, aa_dict=aa_dict, rna_nucleotides=rna_nucleotides, rna_nucleotides_mp=rna_nucleotides_mp) ecoli.add_mrnas(mrna_dict.values()) ecoli.add_ribosome(rib, free_rib_ratio) ecoli.add_rnap(rnap, free_rnap_ratio) ecoli.build_expression() ecoli.add_enzymatic_coupling(coupling_dict) if has_allocation: nt_ratios, aa_ratios = get_ratios() chromosome_len, gc_ratio = get_ecoli_gen_stats() kdeg_mrna, mrna_length_avg = get_mrna_metrics() kdeg_enz, peptide_length_avg = get_enz_metrics() neidhardt_mu, neidhardt_rrel, neidhardt_prel, neidhardt_drel = get_neidhardt_data( ) ecoli.add_dummies(nt_ratios=nt_ratios, mrna_kdeg=kdeg_mrna, mrna_length=mrna_length_avg, aa_ratios=aa_ratios, enzyme_kdeg=kdeg_enz, peptide_length=peptide_length_avg) add_protein_mass_requirement(ecoli, neidhardt_mu, neidhardt_prel) add_rna_mass_requirement(ecoli, neidhardt_mu, neidhardt_rrel) add_dna_mass_requirement(ecoli, mu_values=neidhardt_mu, dna_rel=neidhardt_drel, gc_ratio=gc_ratio, chromosome_len=chromosome_len, dna_dict=dna_nucleotides) dna_pol = get_dna_polymerase() ecoli.add_enzymatic_coupling({'DNA_formation': [ dna_pol, ]}) # Need to put after, because dummy has to be taken into account if used. ecoli.populate_expression() ecoli.add_trna_mass_balances() ecoli.print_info() ecoli.growth_reaction.lower_bound = observed_growth - 1 * observed_growth_std need_relax = False ecoli.repair() try: ecoli.optimize() except (AttributeError, SolverError): need_relax = True if has_thermo and need_relax: # final_model, slack_model, relax_table = relax_dgo(ecoli) final_model, slack_model, relax_table = relax_dgo(ecoli, in_place=True) else: final_model = ecoli final_model.growth_reaction.lower_bound = 0 # apply_bounds(ecoli, original_bounds) solution = final_model.optimize() print_standard_sol(final_model) filepath = 'models/{}'.format(final_model.name) save_json_model(final_model, filepath) final_model.logger.info('Build complete for model {}'.format( final_model.name)) return final_model
def __init__(self, model_code='ecoli:iJO1366', solver='gurobi', min_biomass=0.55): start_time = time.time() super().__init__(model_code, solver, min_biomass) if self.species == 'ecoli': # Add cystein -> selenocystein transformation for convenience selcys = Metabolite(id='selcys__L_c', compartment='c', formula='C3H7NO2Se') selcys_rxn = Reaction(id='PSEUDO_selenocystein_synthase', name='PSEUDO Selenocystein_Synthase') selcys_rxn.add_metabolites({ self.model.metabolites.cys__L_c: -1, selcys: +1 }) self.model.add_reactions([selcys_rxn]) self._sanitize_varnames() # self.model.reactions.EX_glc__D_e.lower_bound = -1 * glc_uptake - glc_uptake_std # self.model.reactions.EX_glc__D_e.upper_bound = -1 * glc_uptake + glc_uptake_std # time_str = get_timestr() coupling_dict = get_coupling_dict(self.model, mode='kmax', atps_name='ATPS4rpp', infer_missing_enz=True) aa_dict, rna_nucleotides, rna_nucleotides_mp, dna_nucleotides = get_monomers_dict( ) essentials = get_essentials() # if has_thermo: thermo_db = load_thermoDB( join(data_dir, 'thermo/thermo_data.thermodb')) self.model = ThermoMEModel(thermo_db, model=self.model, growth_reaction=self.biomass_reaction, mu_range=mu_range, n_mu_bins=n_mu_bins) self.model.name = self.model_name # annotate_from_lexicon(self.model, read_lexicon(dir_path + '/data/thermo/lexicon.csv')) # compartment_data = read_compartment_data(dir_path + '/data/thermo/compartment_data.json') # apply_compartment_data(self.model, compartment_data) apply_compartment_data( self.model, read_compartment_data( join(data_dir, 'thermo/compartment_data.json'))) annotate_from_lexicon( self.model, read_lexicon(join(data_dir, 'thermo/lexicon.csv'))) self.model.prepare() # self.model.reactions.MECDPS.thermo['computed'] = False # self.model.reactions.NDPK4.thermo['computed'] = False # self.model.reactions.TMDPP.thermo['computed'] = False # self.model.reactions.ARGAGMt7pp.thermo['computed'] = False self.model.convert() # else: # self.model = MEModel(model=self.model, growth_reaction=growth_reaction_id, mu_range=mu_range, # n_mu_bins=n_mu_bins, name=name) # mrna_dict = get_mrna_dict(self.model) # nt_sequences = get_nt_sequences() nt_sequences = pd.read_csv(join( data_dir, f'{self.species}/{self.model_name}_nt_seq_kegg.csv'), index_col=0, header=None).iloc[:, 0] mrna_dict = self.get_mrna_dict(nt_sequences) rnap = get_rnap() rib = get_rib() # Remove nucleotides and amino acids from biomass reaction as they will be # taken into account by the expression remove_from_biomass_equation(model=self.model, nt_dict=rna_nucleotides, aa_dict=aa_dict, essentials_dict=essentials) self.model.add_nucleotide_sequences(nt_sequences) self.model.add_essentials(essentials=essentials, aa_dict=aa_dict, rna_nucleotides=rna_nucleotides, rna_nucleotides_mp=rna_nucleotides_mp) self.model.add_mrnas(mrna_dict.values()) self.model.add_ribosome(rib, free_ratio=0.2) # http://bionumbers.hms.harvard.edu/bionumber.aspx?id=102348&ver=1&trm=rna%20polymerase%20half%20life&org= # Name Fraction of active RNA Polymerase # Bionumber ID 102348 # Value 0.17-0.3 unitless # Source Bremer, H., Dennis, P. P. (1996) Modulation of chemical composition and other parameters of the cell by growth rate. # Neidhardt, et al. eds. Escherichia coli and Salmonella typhimurium: Cellular # and Molecular Biology, 2nd ed. chapter 97 Table 1 self.model.add_rnap(rnap, free_ratio=0.75) self.model.build_expression() self.model.add_enzymatic_coupling(coupling_dict) # if has_neidhardt: # nt_ratios, aa_ratios = get_ratios() # chromosome_len, gc_ratio = get_ecoli_gen_stats() # kdeg_mrna, mrna_length_avg = get_mrna_metrics() # kdeg_enz, peptide_length_avg = get_enz_metrics() # neidhardt_mu, neidhardt_rrel, neidhardt_prel, neidhardt_drel = get_neidhardt_data() # # add_interpolation_variables(self.model) # self.model.add_dummies(nt_ratios=nt_ratios, mrna_kdeg=kdeg_mrna, mrna_length=mrna_length_avg, # aa_ratios=aa_ratios, enzyme_kdeg=kdeg_enz, peptide_length=peptide_length_avg) # add_protein_mass_requirement(self.model, neidhardt_mu, neidhardt_prel) # add_rna_mass_requirement(self.model, neidhardt_mu, neidhardt_rrel) # add_dna_mass_requirement(self.model, mu_values=neidhardt_mu, dna_rel=neidhardt_drel, gc_ratio=gc_ratio, # chromosome_len=chromosome_len, dna_dict=dna_nucleotides) # Need to put after, because dummy has to be taken into account if used. self.model.populate_expression() self.model.add_trna_mass_balances() # self.model.growth_reaction.lower_bound = objective_lb self.model.repair() print( f"Building ETFL model costs {time.time() - start_time:.2f} seconds!" ) try: start_time = time.time() self.model.optimize() except (AttributeError, SolverError): print( f"Solving no relaxed model costs {time.time() - start_time:.2f} seconds!" ) start_time = time.time() self.model, _, _ = relax_dgo(self.model, in_place=True) print( f"Relaxing model costs {time.time() - start_time:.2f} seconds!" ) # self.model.growth_reaction.lower_bound = 0 # print(f"Build ETFL model for {time.time() - start_time:.2f} seconds!") self.model.print_info()
path_to_compartment_data = join(this_directory, '..', 'models/iJO1366/compartment_data.json') # Scaling to avoid numerical errors with bad lumps for rxn in model.reactions: if rxn.id.startswith('LMPD_'): rxn.add_metabolites( {x: v * (0.1 - 1) for x, v in rxn.metabolites.items()}) thermo_data = load_thermoDB(thermoDB) lexicon = read_lexicon(path_to_lexicon) compartment_data = read_compartment_data(path_to_compartment_data) tfa_model = ThermoModel(thermo_data, model) annotate_from_lexicon(tfa_model, lexicon) apply_compartment_data(tfa_model, compartment_data) tfa_model.name = 'Lumped Model' tfa_model.prepare() tfa_model.convert() # tfa_model.solver.configuration.verbosity = True tfa_model.logger.setLevel = 30 def test_redgem(): redgem = RedGEM(tfa_model, path_to_params, False) rgem = redgem.run() obj_val = rgem.slim_optimize() # assert(obj_val > 0)
def create_model(has_thermo, has_expression, has_neidhardt, n_mu_bins=128): #------------------------------------------------------------ # Initialisation #------------------------------------------------------------ assert has_expression == True # this hack works because we are using the solver switch to update the var # names in the solver but really we should not do this # TODO: clean up model.sanitize_varnames vanilla_model = get_model('optlang-glpk') vanilla_model.reactions.EX_glc__D_e.lower_bound = -1 * glc_uptake - glc_uptake_std vanilla_model.reactions.EX_glc__D_e.upper_bound = -1 * glc_uptake + glc_uptake_std vanilla_model.objective = growth_reaction_id fba_sol = vanilla_model.slim_optimize() mu_0 = fba_sol mu_range = [0, 3.5] n_mu_bins = n_mu_bins time_str = get_timestr() coupling_dict = get_coupling_dict( vanilla_model, mode='kmax', # mode = 'kcat', atps_name='ATPS4rpp', infer_missing_enz=True) # coupling_dict = get_lloyd_coupling_dict(vanilla_model) aa_dict, rna_nucleotides, rna_nucleotides_mp, dna_nucleotides = get_monomers_dict( ) essentials = get_essentials() # Initialize the model name = 'iJO1366_T{:1}E{:1}N{:1}_{}_enz_{}_bins_{}.json'.format( has_thermo, has_expression, has_neidhardt, len(coupling_dict), n_mu_bins, time_str) if has_thermo: thermo_data, lexicon, compartment_data = get_thermo_data() ecoli = ThermoMEModel( thermo_data, model=vanilla_model, growth_reaction=growth_reaction_id, mu_range=mu_range, n_mu_bins=n_mu_bins, name=name, ) else: ecoli = MEModel( model=vanilla_model, growth_reaction=growth_reaction_id, mu_range=mu_range, n_mu_bins=n_mu_bins, name=name, ) ecoli.name = name ecoli.logger.setLevel(logging.WARNING) ecoli.solver = solver standard_solver_config(ecoli) if has_thermo: # Annotate the cobra_model annotate_from_lexicon(ecoli, lexicon) apply_compartment_data(ecoli, compartment_data) # TFA conversion ecoli.prepare() # ecoli.reactions.GLUDy.thermo['computed'] = False # ecoli.reactions.DHAtpp.thermo['computed'] = False # ecoli.reactions.MLTP2.thermo['computed'] = False # ecoli.reactions.G3PD2.thermo['computed'] = False ecoli.reactions.MECDPS.thermo['computed'] = False ecoli.reactions.NDPK4.thermo['computed'] = False ecoli.reactions.TMDPP.thermo['computed'] = False ecoli.reactions.ARGAGMt7pp.thermo['computed'] = False ecoli.convert() #add_displacement = True) mrna_dict = get_mrna_dict(ecoli) nt_sequences = get_nt_sequences() rnap = get_rnap() rib = get_rib() # Remove nucleotides and amino acids from biomass reaction as they will be # taken into account by the expression remove_from_biomass_equation( model=ecoli, nt_dict=rna_nucleotides, aa_dict=aa_dict, atp_id=essentials['atp'], adp_id=essentials['adp'], pi_id=essentials['pi'], h2o_id=essentials['h2o'], h_id=essentials['h'], ) ########################## ## MODEL CREATION ## ########################## ecoli.add_nucleotide_sequences(nt_sequences) ecoli.add_essentials(essentials=essentials, aa_dict=aa_dict, rna_nucleotides=rna_nucleotides, rna_nucleotides_mp=rna_nucleotides_mp) ecoli.add_mrnas(mrna_dict.values()) ecoli.add_ribosome(rib, free_ratio=0.2) # http://bionumbers.hms.harvard.edu/bionumber.aspx?id=102348&ver=1&trm=rna%20polymerase%20half%20life&org= # Name Fraction of active RNA Polymerase # Bionumber ID 102348 # Value 0.17-0.3 unitless # Source Bremer, H., Dennis, P. P. (1996) Modulation of chemical composition and other parameters of the cell by growth rate. # Neidhardt, et al. eds. Escherichia coli and Salmonella typhimurium: Cellular # and Molecular Biology, 2nd ed. chapter 97 Table 1 ecoli.add_rnap(rnap, free_ratio=0.75) ecoli.build_expression() ecoli.add_enzymatic_coupling(coupling_dict) if has_neidhardt: nt_ratios, aa_ratios = get_ratios() chromosome_len, gc_ratio = get_ecoli_gen_stats() kdeg_mrna, mrna_length_avg = get_mrna_metrics() kdeg_enz, peptide_length_avg = get_enz_metrics() neidhardt_mu, neidhardt_rrel, neidhardt_prel, neidhardt_drel = get_neidhardt_data( ) ecoli.add_interpolation_variables() ecoli.add_dummies(nt_ratios=nt_ratios, mrna_kdeg=kdeg_mrna, mrna_length=mrna_length_avg, aa_ratios=aa_ratios, enzyme_kdeg=kdeg_enz, peptide_length=peptide_length_avg) ecoli.add_protein_mass_requirement(neidhardt_mu, neidhardt_prel) ecoli.add_rna_mass_requirement(neidhardt_mu, neidhardt_rrel) ecoli.add_dna_mass_requirement(mu_values=neidhardt_mu, dna_rel=neidhardt_drel, gc_ratio=gc_ratio, chromosome_len=chromosome_len, dna_dict=dna_nucleotides) # Need to put after, because dummy has to be taken into account if used. ecoli.populate_expression() ecoli.add_trna_mass_balances() ecoli.print_info() ecoli.growth_reaction.lower_bound = observed_growth - 3 * observed_growth_std need_relax = False ecoli.repair() try: ecoli.optimize() except (AttributeError, SolverError): need_relax = True # from ipdb import set_trace; set_trace() if has_thermo and need_relax: final_model, slack_model, relax_table = relax_dgo(ecoli) # final_model, slack_model, relax_table = relax_dgo(ecoli, in_place = True) else: final_model = ecoli final_model.growth_reaction.lower_bound = 0 solution = final_model.optimize() print('Objective : {}'.format( final_model.solution.objective_value)) print(' - Glucose uptake : {}'.format( final_model.reactions.EX_glc__D_e.flux)) print(' - Growth : {}'.format(final_model.growth_reaction.flux)) print(' - Ribosomes produced: {}'.format(final_model.ribosome.X)) print(' - RNAP produced: {}'.format(final_model.rnap.X)) try: print(' - DNA produced: {}'.format(final_model.solution.raw.DN_DNA)) except AttributeError: pass filepath = 'models/{}'.format(final_model.name) # save_json_model(final_model, filepath) final_model.logger.info('Build complete for model {}'.format( final_model.name)) return final_model
# ------------------------------------------------------------ # Thermo # ------------------------------------------------------------ def curate_lexicon(lexicon): ix = pd.Series(lexicon.index) ix = ix.apply(lambda s: str.replace(s,'-','__')) ix = ix.apply(lambda s: '_'+s if s[0].isdigit() else s) lexicon.index = ix return lexicon lexicon = curate_lexicon(read_lexicon('thermo_data/iJO1366_lexicon.csv')) # Annotate the cobra_model annotate_from_lexicon(ecoli, lexicon) apply_compartment_data(ecoli, compartment_data) # TFA conversion ecoli.prepare() ecoli.convert()#add_displacement = True) #------------------------------------------------------------ # Data #------------------------7.54------------------------------------- # Growth-related abundances neidhardt_data = pd.read_excel(pjoin(data_dir,'neidhardt_tab2.xlsx'), skiprows=range(0,6),
def create_tfa_model(add_displacement=False): #------------------------------------------------------------ # Initialisation #------------------------------------------------------------ time_str = get_timestr() name = 'iJO1366_TFA_{}.json'.format(time_str) # this hack works because we are using the solver switch to update the var # names in the solver but really we should not do this # TODO: clean up model.sanitize_varnames vanilla_model = get_model('optlang-glpk') vanilla_model.reactions.EX_glc__D_e.lower_bound = -1 * glc_uptake - glc_uptake_std vanilla_model.reactions.EX_glc__D_e.upper_bound = -1 * glc_uptake + glc_uptake_std vanilla_model.objective = growth_reaction_id fba_sol = vanilla_model.slim_optimize() thermo_data, lexicon, compartment_data = get_thermo_data() ecoli = ThermoModel( thermo_data=thermo_data, model=vanilla_model, name=name, ) ecoli.name = name ecoli.logger.setLevel(logging.WARNING) ecoli.sloppy = True # apply_bounds(ecoli,fva) ecoli.solver = solver annotate_from_lexicon(ecoli, lexicon) apply_compartment_data(ecoli, compartment_data) # TFA conversion ecoli.prepare() ecoli.convert(add_displacement=add_displacement) ecoli.print_info() ecoli.reactions.get_by_id(growth_reaction_id).lower_bound = observed_growth - \ 1*observed_growth_std need_relax = False ecoli.repair() try: ecoli.optimize() except (AttributeError, SolverError): need_relax = True if need_relax: # final_model, slack_model, relax_table = relax_dgo(ecoli) final_model, slack_model, relax_table = relax_dgo(ecoli, in_place=True) else: final_model = ecoli final_model.reactions.get_by_id(growth_reaction_id).lower_bound = 0 # apply_bounds(ecoli, original_bounds) solution = final_model.optimize() print('Objective : {}'.format( final_model.solution.objective_value)) print(' - Glucose uptake : {}'.format( final_model.reactions.EX_glc__D_e.flux)) filepath = 'models/{}'.format(final_model.name) save_json_model(final_model, filepath) final_model.logger.info('Build complete for model {}'.format( final_model.name)) return final_model
# thermo_data = load_thermoDB('../data/thermo_data.thermodb') # lexicon = read_lexicon('../models/small_ecoli/lexicon.csv') # compartment_data = read_compartment_data('../models/small_ecoli/compartment_data.json') cobra_model = load_json_model('../models/iJO1366.json') thermo_data = load_thermoDB('../data/thermo_data.thermodb') lexicon = read_lexicon('../models/iJO1366/lexicon.csv') compartment_data = read_compartment_data( '../models/iJO1366/compartment_data.json') # Initialize the cobra_model mytfa = pytfa.ThermoModel(thermo_data, cobra_model) # Annotate the cobra_model annotate_from_lexicon(mytfa, lexicon) apply_compartment_data(mytfa, compartment_data) # Initialize the cobra_model tmodel = pytfa.ThermoModel(thermo_data, cobra_model) tmodel.name = 'tutorial' # Annotate the cobra_model annotate_from_lexicon(tmodel, lexicon) apply_compartment_data(tmodel, compartment_data) # Set the solver tmodel.solver = GUROBI ## TFA conversion tmodel.prepare()