def load_ensemble(inputfile, **kwargs): """ Load ensemble model from SBML file. Args: inputfile (str): input file **kwargs (dict): additional arguments to *load_cbmodel* method Returns: EnsembleModel: ensemble model """ model = load_cbmodel(inputfile, **kwargs) reaction_states = {} for r_id, rxn in model.reactions.items(): if 'ENSEMBLE_STATE' in rxn.metadata: state_as_str = rxn.metadata['ENSEMBLE_STATE'] states = [bool(int(x)) for x in state_as_str.split()] reaction_states[r_id] = states sizes = list(map(len, reaction_states.values())) if len(set(sizes)) > 1: print('Error: reactions have different ensemble size') return return EnsembleModel(model, sizes[0], reaction_states)
def compute_bigg_gibbs_energy(modelfile, equi_cmpds_file, outputfile=None): """ Calculate standard Gibbs Energy for reactions in a model (as many as possible) using eQuilibrator. Args: modelfile (str): SBML file equi_cmpds_file (str): file containing KEGG compounds accepted by eQuilibrator outputfile (str): output CSV file (optional) Returns: dict: standard Gibbs Energies indexed by reaction ids dict: estimation error indexed by reaction ids """ model = load_cbmodel(modelfile) kegg_compounds = pd.read_csv(equi_cmpds_file, sep='\t') kegg_compounds = set(kegg_compounds['compound_id']) dG0, sdG0 = calculate_deltaG0s(model, kegg_compounds, pH=default_pH, I=default_I, T=default_T) if outputfile: data = pd.DataFrame({'dG0': dG0, 'sdG0': sdG0}) data.to_csv(outputfile) else: return dG0, sdG0
def testRun(self): model = load_cbmodel(SMALL_TEST_MODEL) write_model_to_file(model, PLAIN_TEXT_COPY) model_copy = read_model_from_file(PLAIN_TEXT_COPY, kind='cb') self.assertListEqual(sorted(model.metabolites.keys()), sorted(model_copy.metabolites.keys())) self.assertListEqual(model.reactions.keys(), model_copy.reactions.keys())
def carveme(open_bounds, fungi_id, universal_model_path, reaction_scores): """ Calls the CarveMe algorithm for the model reconstruction. :param open_bounds: True/False depending on the exchange constrains of the universal model. :param fungi_id: Fungi id. :param universal_model_path: Path to the universal csv file. :param reaction_scores: Scores for the reactions. :return: The number of models reconstructed and the average objective value of the models in the ensemble. """ # Open Framed model universal_model_framed = load_cbmodel(universal_model_path, exchange_detection_mode='unbalanced', flavor='fbc2', load_gprs=False, load_metadata=False) # Set the bounds of the model for reaction in universal_model_framed.reactions: if universal_model_framed.reactions[reaction].lb is None: universal_model_framed.reactions[reaction].lb = -100 if universal_model_framed.reactions[reaction].ub is None: universal_model_framed.reactions[reaction].ub = 100 if not open_bounds: # Glucose, ammonium, water, O2,... universal_model_framed.reactions['R_UF03376_E'].lb = -100 universal_model_framed.reactions['R_UF02549_E'].lb = -100 universal_model_framed.reactions['R_UF03382_E'].lb = -100 universal_model_framed.reactions['R_UF03474_E'].lb = -100 universal_model_framed.reactions['R_UF02765_E'].lb = -100 universal_model_framed.reactions['R_UF03268_E'].lb = -100 universal_model_framed.reactions['R_UF03456_E'].lb = -100 universal_model_framed.reactions['R_UF03314_E'].lb = -100 universal_model_framed.reactions['R_UF03288_E'].lb = -100 else: for reaction in universal_model_framed.reactions: if reaction.endswith('_E'): universal_model_framed.reactions[reaction].lb = -100 universal_model_framed.reactions[reaction].ub = 100 # Run CarveMe objective, reconstructed_models = CarveMeFuncPool.carve_model( universal_model_framed, reaction_scores, eps=1e-3, min_growth=0.1, min_atpm=0.1, feast=1e-7, opti=1e-7) # Save the models into files (to be able to work with cobra and update the sbml files) if not open_bounds: reconstructed_counter = 0 for modelCreated in reconstructed_models: save_sbml_model(modelCreated, 'results/' + fungi_id + str(reconstructed_counter) + 'M.sbml', flavor='cobra') reconstructed_counter = reconstructed_counter + 1 else: reconstructed_counter = 0 for modelCreated in reconstructed_models: save_sbml_model(modelCreated, 'results/' + fungi_id + str(reconstructed_counter) + 'O.sbml', flavor='cobra') reconstructed_counter = reconstructed_counter + 1 return reconstructed_counter, objective
def testRun(self): model = load_cbmodel(SMALL_TEST_MODEL, flavor='cobra') save_sbml_model(model, TEST_MODEL_COPY3, flavor='fbc2') model_copy = load_cbmodel(TEST_MODEL_COPY3, flavor='fbc2') self.assertEqual(model.id, model_copy.id) self.assertListEqual(model.compartments.keys(), model_copy.compartments.keys()) self.assertListEqual(model.metabolites.keys(), model_copy.metabolites.keys()) self.assertListEqual(model.reactions.keys(), model_copy.reactions.keys()) for r1, r2 in zip(model.reactions.values(), model_copy.reactions.values()): self.assertEqual(r1.name, r2.name) self.assertEqual(r1.reversible, r2.reversible) self.assertDictEqual(r1.stoichiometry, r2.stoichiometry) self.assertEqual(r1.lb, r2.lb) self.assertEqual(r1.ub, r2.ub) self.assertEqual(str(r1.gpr), str(r2.gpr)) self.assertListEqual(model.genes.keys(), model_copy.genes.keys())
def main(inputfiles, flavor=None, split_pool=False, no_biomass=False, init=None, mediadb=None, ext_comp_id=None, outputfile=None): if not flavor: flavor = config.get('sbml', 'default_flavor') if outputfile: model_id = os.path.splitext(os.path.basename(outputfile))[0] else: model_id = 'community' outputfile = 'community.xml' if ext_comp_id is None: ext_comp_id = 'C_e' models = [load_cbmodel(inputfile, flavor=flavor) for inputfile in inputfiles] community = Community(model_id, models, extracellular_compartment_id=ext_comp_id, merge_extracellular_compartments=(not split_pool), create_biomass=(not no_biomass)) merged = community.generate_merged_model() if init: if not mediadb: mediadb = project_dir + config.get('input', 'media_library') try: media_db = load_media_db(mediadb) except IOError: raise IOError('Failed to load media library:' + mediadb) if split_pool: exchange_format = "'R_EX_M_{}_e_pool'" else: exchange_format = "'R_EX_{}_e'" init_env = Environment.from_compounds(media_db[init], exchange_format=exchange_format) init_env.apply(merged, inplace=True) save_cbmodel(merged, outputfile, flavor=flavor)
def main(inputfile, media, mediadb=None, universe=None, universe_file=None, outputfile=None, flavor=None, exchange_format=None, verbose=False): if verbose: print('Loading model...') try: model = load_cbmodel(inputfile, flavor=flavor) except IOError: raise IOError('Failed to load model:' + inputfile) if verbose: print('Loading reaction universe...') if not universe_file: if universe: universe_file = "{}{}universe_{}.xml".format(project_dir, config.get('generated', 'folder'), universe) else: universe_file = project_dir + config.get('generated', 'default_universe') try: universe_model = load_cbmodel(universe_file) except IOError: if universe: raise IOError('Failed to load universe "{0}". Please run build_universe.py --{0}.'.format(universe)) else: raise IOError('Failed to load universe model:' + universe_file) if verbose: print('Loading media...') if not mediadb: mediadb = project_dir + config.get('input', 'media_library') try: media_db = load_media_db(mediadb) except IOError: raise IOError('Failed to load media database:' + mediadb) if verbose: m1, n1 = len(model.metabolites), len(model.reactions) print('Gap filling for {}...'.format(', '.join(media))) max_uptake = config.getint('gapfill', 'max_uptake') multiGapFill(model, universe_model, media, media_db, max_uptake=max_uptake, inplace=True, exchange_format=exchange_format) if verbose: m2, n2 = len(model.metabolites), len(model.reactions) print('Added {} reactions and {} metabolites'.format((n2 - n1), (m2 - m1))) if verbose: print('Saving SBML file...') if not outputfile: outputfile = os.path.splitext(inputfile)[0] + '_gapfill.xml' if not flavor: flavor = config.get('sbml', 'default_flavor') save_cbmodel(model, outputfile, flavor=flavor) if verbose: print('Done.')
fluxes['delta'] = fluxes['UOK262'] - fluxes['UOK262pFH'] fluxes.columns = [conditions_map[c] if c in conditions_map else c for c in fluxes] # Regulatory p-sites r_sites = read_csv('./files/Regulatory_sites.txt', sep='\t') r_sites = r_sites[r_sites['ORGANISM'] == 'human'] r_sites = r_sites[[i.endswith('-p') for i in r_sites['MOD_RSD']]] r_sites['res'] = ['%s_%s' % (g, p.split('-')[0]) for g, p in r_sites[['GENE', 'MOD_RSD']].values] # -- Import metabolic model gmap = read_csv('./files/non_alt_loci_set.txt', sep='\t') gmap['hgsn'] = ['G_' + i.replace(':', '_') for i in gmap['hgnc_id']] gmap = gmap.groupby('hgsn')['symbol'].agg(lambda x: list(x)[0]) model = load_cbmodel('./files/recon2.2.xml', flavor='cobra') model.detect_biomass_reaction() model.remove_metabolite('M_biomass_c') model.add_reaction_from_str('R_ATPM: M_h2o_c + M_atp_c --> M_adp_c + M_pi_c + M_h_c') g_enzymes = {gmap[g] for g in model.genes if g in gmap} g_to_r = model.gene_to_reaction_lookup() g_to_r = {gmap[g]: g_to_r[g] for g in g_to_r if g in gmap} # -- Correlation of phosphoprotoemics with proteomics plot_df = DataFrame({p: { 'proteomics': proteomics.loc[p.split('_')[0], 'fc'], 'phosphoproteomics': phosphoproteomics.loc[p, 'fc'] } for p in phosphoproteomics.index if p.split('_')[0] in proteomics.index}).T
def main(inputfile, input_type='protein', outputfile=None, diamond_args=None, universe=None, universe_file=None, ensemble_size=None, verbose=False, debug=False, flavor=None, gapfill=None, blind_gapfill=False, init=None, mediadb=None, default_score=None, uptake_score=None, soft_score=None, soft=None, hard=None, reference=None, ref_score=None, recursive_mode=False, specified_solver=None, feas_tol=None, opt_tol=None, int_feas_tol=None): if recursive_mode: model_id = os.path.splitext(os.path.basename(inputfile))[0] if outputfile: outputfile = '{}/{}.xml'.format(outputfile, model_id) else: outputfile = os.path.splitext(inputfile)[0] + '.xml' else: if outputfile: model_id = os.path.splitext(os.path.basename(outputfile))[0] else: model_id = os.path.splitext(os.path.basename(inputfile))[0] outputfile = os.path.splitext(inputfile)[0] + '.xml' model_id = build_model_id(model_id) outputfolder = os.path.abspath(os.path.dirname(outputfile)) if not os.path.exists(outputfolder): try: os.makedirs(outputfolder) except: print('Unable to create output folder:', outputfolder) return if soft: try: soft_constraints = load_soft_constraints(soft) except IOError: raise IOError('Failed to load soft-constraints file:' + soft) else: soft_constraints = None if hard: try: hard_constraints = load_hard_constraints(hard) except IOError: raise IOError('Failed to load hard-constraints file:' + hard) else: hard_constraints = None if input_type == 'refseq' or input_type == 'genbank': if verbose: print('Downloading genome {} from NCBI...'.format(inputfile)) ncbi_table = load_ncbi_table(project_dir + config.get('ncbi', input_type)) inputfile = download_ncbi_genome(inputfile, ncbi_table) if not inputfile: print('Failed to download genome from NCBI.') return input_type = 'protein' if inputfile.endswith('.faa.gz') else 'dna' if input_type == 'protein' or input_type == 'dna': if verbose: print('Running diamond...') diamond_db = project_dir + config.get('input', 'diamond_db') blast_output = os.path.splitext(inputfile)[0] + '.tsv' exit_code = run_blast(inputfile, input_type, blast_output, diamond_db, diamond_args, verbose) if exit_code is None: print('Unable to run diamond (make sure diamond is available in your PATH).') return if exit_code != 0: print('Failed to run diamond.') if diamond_args is not None: print('Incorrect diamond args? Please check documentation or use default args.') return annotations = load_diamond_results(blast_output) elif input_type == 'eggnog': annotations = load_eggnog_data(inputfile) elif input_type == 'diamond': annotations = load_diamond_results(inputfile) else: raise ValueError('Invalid input type: ' + input_type) if verbose: print('Loading universe model...') if not universe_file: if universe: universe_file = "{}{}universe_{}.xml.gz".format(project_dir, config.get('generated', 'folder'), universe) else: universe_file = project_dir + config.get('generated', 'default_universe') # change default solver if a solver is specified in the input if specified_solver is not None: if specified_solver != config.get('solver', 'default_solver'): set_default_solver(specified_solver) params_to_set = {'FEASIBILITY_TOL': feas_tol, 'OPTIMALITY_TOL': opt_tol, 'INT_FEASIBILITY_TOL': int_feas_tol} for key,value in params_to_set.items(): if value is not None: set_default_parameter(getattr(Parameter, key), value) try: universe_model = load_cbmodel(universe_file, flavor=config.get('sbml', 'default_flavor')) universe_model.id = model_id except IOError: available = '\n'.join(glob("{}{}universe_*.xml.gz".format(project_dir, config.get('generated', 'folder')))) raise IOError('Failed to load universe model: {}\nAvailable universe files:\n{}'.format(universe_file, available)) if reference: if verbose: print('Loading reference model...') try: ref_model = load_cbmodel(reference) except: raise IOError('Failed to load reference model.') else: ref_model = None if gapfill or init: if verbose: print('Loading media library...') if not mediadb: mediadb = project_dir + config.get('input', 'media_library') try: media_db = load_media_db(mediadb) except IOError: raise IOError('Failed to load media library:' + mediadb) if verbose: print('Scoring reactions...') bigg_gprs = project_dir + config.get('generated', 'bigg_gprs') gprs = pd.read_csv(bigg_gprs) gprs = gprs[gprs.reaction.isin(universe_model.reactions)] debug_output = model_id if debug else None scores = reaction_scoring(annotations, gprs, debug_output=debug_output) if scores is None: print('The input genome did not match sufficient genes/reactions in the database.') return if not flavor: flavor = config.get('sbml', 'default_flavor') init_env = None if init: if init in media_db: init_env = Environment.from_compounds(media_db[init]) else: print('Error: medium {} not in media database.'.format(init)) universe_model.metadata['Description'] = 'This model was built with CarveMe version ' + version if ensemble_size is None or ensemble_size <= 1: if verbose: print('Reconstructing a single model') if not gapfill: carve_model(universe_model, scores, outputfile=outputfile, flavor=flavor, default_score=default_score, uptake_score=uptake_score, soft_score=soft_score, soft_constraints=soft_constraints, hard_constraints=hard_constraints, ref_model=ref_model, ref_score=ref_score, init_env=init_env, debug_output=debug_output) else: model = carve_model(universe_model, scores, inplace=False, default_score=default_score, uptake_score=uptake_score, soft_score=soft_score, soft_constraints=soft_constraints, hard_constraints=hard_constraints, ref_model=ref_model, ref_score=ref_score, init_env=init_env, debug_output=debug_output) else: if verbose: print('Building an ensemble of', ensemble_size, 'models') build_ensemble(universe_model, scores, ensemble_size, outputfile, flavor, init_env=init_env) if gapfill and model is not None: media = gapfill.split(',') if verbose: m1, n1 = len(model.metabolites), len(model.reactions) print('Gap filling for {}...'.format(', '.join(media))) max_uptake = config.getint('gapfill', 'max_uptake') if blind_gapfill: scores = None else: scores = dict(scores[['reaction', 'normalized_score']].values) multiGapFill(model, universe_model, media, media_db, scores=scores, max_uptake=max_uptake, inplace=True) if verbose: m2, n2 = len(model.metabolites), len(model.reactions) print('Added {} reactions and {} metabolites'.format((n2 - n1), (m2 - m1))) if init_env: #Should initialize enviroment again as new exchange reactions can be acquired during gap-filling init_env.apply(model, inplace=True, warning=False) save_cbmodel(model, outputfile, flavor=flavor) if verbose: print('Done.')
type=types_dict[optim_type], criticalReacs=critical_proteins, isMultiProc=multi_thread, candidateSize=cand_size, resultFile=output_file) return res if __name__ == "__main__": # First Step # Load the Model: Cobra or Framed optimmodels_path = os.path.dirname(optimModels.__file__) ecoli_model = os.path.join(optimmodels_path, "examples", "models", "Ec_iAF1260.xml") # path to the model file framed_model = load_cbmodel(filename=ecoli_model, flavor="cobra") # Second Step # Evaluating Function # different functions require different arguments ec_stoic_biomass = "R_Ec_biomass_iAF1260_core_59p81M" ec_stoic_succinate = "R_EX_succ_e" stoic_eval_function = build_evaluation_function( "WYIELD", # evaluating function id ec_stoic_biomass, # biomass reaction id ec_stoic_succinate, # target reaction id alpha=0.3, # percentage of maximum target considered minBiomassValue=0.03135 # minimum biomass for viable solution ) # Third Step # Run Optimization
optimmodels_path = os.path.dirname(optimModels.__file__) models_path = os.path.abspath(os.path.join(optimmodels_path, "examples", "models")) model1 = "Yokenella_regensburgei_ATCC_43003.xml" model2 = "Acinetobacter_junii_SH205.xml" model3 = "Clostridiales_sp_1_7_47FAA.xml" model4 = "Achromobacter_xylosoxidans_A8.xml" model5 = "Achromobacter_xylosoxidans_NBRC_15126.xml" model6 = "Acidaminococcus_intestini_RyC_MR95.xml" model7 = "Acidaminococcus_sp_D21.xml" model8 = "Acinetobacter_calcoaceticus_PHEA_2.xml" model9 = "Acinetobacter_lwoffii_WJ10621.xml" model10 = "Actinobacillus_pleuropneumoniae_L20.xml" model_list = [model1, model2, model3, model4, model5, model6, model7, model8, model9, model10] list_models = [load_cbmodel(filename = str(os.path.join(models_path, model)), flavor = "cobra:other") for model in model_list] # Step 2 # Create the Community Model comm_model = CModel( community_id = "model_id", models = list_models, empty_flag = False # This creates a complete medium, to crete an empty medium input True ) # That way you can open any medium through constraints # Step 3 # Run the EA Optimization # 3.1 Configure the optimization parameters change_config( # Leave parameters as None to use defaults pop_size = None, # changes the size of the populations of candidates max_gen = None, # changes the max number of generations