def build_bigg_universe_model(outputfile=None): """ Download the whole BiGG universe database as a CBModel and (optionally) store in SBML. Args: outputfile (str): SBML output file (optional) Returns: CBModel: universe model """ print 'Downloading universal data from BiGG...' model = CBModel('bigg_universe') bigg_rxns = get_request(reactions_url) n = len(bigg_rxns['results']) for i, entry in enumerate(bigg_rxns['results']): build_reaction(model, entry['bigg_id']) progress(i, n) print '\n' if outputfile: save_cbmodel(model, outputfile) return model
def save_ensemble(ensemble, outputfile, **kwargs): """ Save ensemble model as an SBML file. Args: ensemble (EnsembleModel): model ensemble outputfile (str): output file **kwargs (dict): additional arguments to *save_cbmodel* method """ for r_id, states in ensemble.reaction_states.items(): state_as_str = ' '.join([str(int(x)) for x in states]) ensemble.model.reactions[r_id].metadata['ENSEMBLE_STATE'] = state_as_str save_cbmodel(ensemble.model, outputfile, **kwargs)
def main(inputfiles, flavor=None, split_pool=False, no_biomass=False, init=None, mediadb=None, ext_comp_id=None, outputfile=None): if not flavor: flavor = config.get('sbml', 'default_flavor') if outputfile: model_id = os.path.splitext(os.path.basename(outputfile))[0] else: model_id = 'community' outputfile = 'community.xml' if ext_comp_id is None: ext_comp_id = 'C_e' models = [load_cbmodel(inputfile, flavor=flavor) for inputfile in inputfiles] community = Community(model_id, models, extracellular_compartment_id=ext_comp_id, merge_extracellular_compartments=(not split_pool), create_biomass=(not no_biomass)) merged = community.generate_merged_model() if init: if not mediadb: mediadb = project_dir + config.get('input', 'media_library') try: media_db = load_media_db(mediadb) except IOError: raise IOError('Failed to load media library:' + mediadb) if split_pool: exchange_format = "'R_EX_M_{}_e_pool'" else: exchange_format = "'R_EX_{}_e'" init_env = Environment.from_compounds(media_db[init], exchange_format=exchange_format) init_env.apply(merged, inplace=True) save_cbmodel(merged, outputfile, flavor=flavor)
def main(inputfile, media, mediadb=None, universe=None, universe_file=None, outputfile=None, flavor=None, exchange_format=None, verbose=False): if verbose: print('Loading model...') try: model = load_cbmodel(inputfile, flavor=flavor) except IOError: raise IOError('Failed to load model:' + inputfile) if verbose: print('Loading reaction universe...') if not universe_file: if universe: universe_file = "{}{}universe_{}.xml".format(project_dir, config.get('generated', 'folder'), universe) else: universe_file = project_dir + config.get('generated', 'default_universe') try: universe_model = load_cbmodel(universe_file) except IOError: if universe: raise IOError('Failed to load universe "{0}". Please run build_universe.py --{0}.'.format(universe)) else: raise IOError('Failed to load universe model:' + universe_file) if verbose: print('Loading media...') if not mediadb: mediadb = project_dir + config.get('input', 'media_library') try: media_db = load_media_db(mediadb) except IOError: raise IOError('Failed to load media database:' + mediadb) if verbose: m1, n1 = len(model.metabolites), len(model.reactions) print('Gap filling for {}...'.format(', '.join(media))) max_uptake = config.getint('gapfill', 'max_uptake') multiGapFill(model, universe_model, media, media_db, max_uptake=max_uptake, inplace=True, exchange_format=exchange_format) if verbose: m2, n2 = len(model.metabolites), len(model.reactions) print('Added {} reactions and {} metabolites'.format((n2 - n1), (m2 - m1))) if verbose: print('Saving SBML file...') if not outputfile: outputfile = os.path.splitext(inputfile)[0] + '_gapfill.xml' if not flavor: flavor = config.get('sbml', 'default_flavor') save_cbmodel(model, outputfile, flavor=flavor) if verbose: print('Done.')
def curate_universe(model, model_specific_data, bigg_models, biomass_eq, taxa=None, thermodynamics_data=None, metabolomics_data=None, thermodynamics_method=None, manually_curated=None, unbalanced_metabolites=None, use_heuristics=True, remove_unbalanced=True, remove_blocked=True, outputfile=None): """ Curate universal reaction database from initial database dump. Args: model (CBModel): universal model model_specific_data (pandas.DataFrame): model specific data downloaded from BiGG bigg_models (pandas.DataFrame): Additional information on BiGG models biomass_eq (str): default biomass equation taxa (str): filter by taxa (optional) thermodynamics_data (pandas.DataFrame): used for reversibility estimation (optional) metabolomics_data (pandas.DataFrame): used for reversibility estimation (optional) thermodynamics_method (str): thermodynamics method to use (optional) manually_curated (pandas.DataFrame): manually curated reaction bounds (optional) unbalanced_metabolites (list): unbalanced metabolites that require sink reactions (optional) use_heuristics (bool): apply heuristic rules (no proton pumps, no reversible ATP consumers) (default: True) remove_unbalanced (bool): remove unbalanced reactions from model (default: True) remove_blocked (bool): remove blocked reactions and dead-end metabolites (default: True) outputfile (str): output SBML file (optional) Returns: CBModel: curated universal model Notes: Combines thermodynamics and heuristic rules to determine reaction reversibility. Adds exchange reactions for all extracellular metabolites. Adds sinks reactions for a list of known unbalanced compounds. Adds biomass equations from local biomass database (avoids discarding biomass precursors and other essencial reactions/metabolites that would otherwise be structurally blocked). """ print('Starting universe curation...') print('(initial size: {} x {})\n'.format(len(model.metabolites), len(model.reactions))) trusted_models = bigg_models.query('trusted == True').index.tolist() add_bounds_from_extracted_data(model, model_specific_data, trusted_models) if taxa: print('Filtering by taxa:', taxa) kingdom_map = bigg_models['domain'].to_dict() if taxa in {'cyanobacteria', 'bacteria'}: kingdoms = {'Bacteria'} elif taxa == 'archaea': kingdoms = {'Archaea', 'Bacteria'} else: raise ValueError('Unsupported taxa:' + taxa) filter_reactions_by_kingdoms(model, kingdoms, kingdom_map, inplace=True) if taxa in {'bacteria', 'archaea'}: valid_compartments = {'C_c', 'C_p', 'C_e'} elif taxa == 'cyanobacteria': valid_compartments = {'C_c', 'C_p', 'C_e', 'C_u'} other_compartments = set(model.compartments.keys()) - valid_compartments model.remove_compartments(other_compartments, delete_metabolites=True, delete_reactions=True) print('(size: {} x {})\n'.format(len(model.metabolites), len(model.reactions))) if thermodynamics_data is not None: print('Computing thermodynamics...', end=' ') dG0 = thermodynamics_data['dG0'].to_dict() sdG0 = thermodynamics_data['sdG0'].to_dict() if metabolomics_data is not None: x0 = metabolomics_data.median(axis=1).to_dict() else: x0 = None compute_flux_bounds(model, dG0, sdG0, x0, method=thermodynamics_method, inplace=True, override_trusted=False) print('done\n') print('Applying manual curation rules...', end=' ') if use_heuristics: reversibility_heuristics(model, no_reverse_atp=True, no_proton_pumps=False, override_trusted=False) # manually curated reactions if manually_curated is not None: for r_id, (lb, ub) in manually_curated.iterrows(): if r_id in model.reactions: model.set_flux_bounds(r_id, lb, ub) print('done\n') if remove_unbalanced: # remove arbitrary 'Z' formula from photons if taxa == 'cyanobacteria': for m_id in ['M_photon_e', 'M_photon_p', 'M_photon_c']: model.metabolites[m_id].metadata['FORMULA'] = '' print('Removing unbalanced reactions...') remove_unbalanced_reactions(model) print('(size: {} x {})\n'.format(len(model.metabolites), len(model.reactions))) print('Creating pseudo-reactions...') create_exchange_reactions(model, default_lb=-1000, default_ub=1000) if unbalanced_metabolites: create_sink_reactions(model, unbalanced_metabolites) add_biomass_equation(model, biomass_eq) add_maintenance_atp(model) print('(size: {} x {})\n'.format(len(model.metabolites), len(model.reactions))) if remove_blocked: print('Removing blocked reactions and dead-end metabolites...') simplify(model) print('(size: {} x {})\n'.format(len(model.metabolites), len(model.reactions))) if outputfile: save_cbmodel(model, outputfile) print('Done.')