def __getitem__(self, item): """Get a bundled GECKO model. Parameters ---------- item : basestring Either 'single-pool' for the single-protein pool ecYeastGEM model or 'multi-pool' for individually modeled protein pools. """ try: file_name = self.model_files[item] except KeyError: raise KeyError('model name must be one of {}'.format(', '.join(list(self.model_files)))) if file_name not in self.models: def _f_species(sid): """Convert compartment info in metabolite id to compliant notation.""" return sid.replace('__91__', '_').replace('__93__', '') substitutions = F_REPLACE.copy() substitutions["F_SPECIE"] = _f_species model = read_sbml_model( os.path.join(os.path.dirname(__file__), 'data_files/{}'.format(file_name)), f_replace=substitutions, ) for rxn in model.reactions: if isinf(rxn.upper_bound): rxn.upper_bound = 1000 self.models[file_name] = model return self.models[file_name]
def optimize_models_cameo(model_paths: List[Path]) -> pd.DataFrame: """FBA optimization for all given models.""" results = [] n_models = len(model_paths) for k, path in enumerate(model_paths): # load model start_time = time.time() model = read_sbml_model(str(path)) load_time = time.time() - start_time # [s] # run optimization start_time = time.time() result = fba(model) simulate_time = time.time() - start_time # [s] objective_value = result.objective_value filename = path.name model = filename.split(".")[0] res = (model, objective_value, load_time, simulate_time) results.append(res) print("[{}/{}]".format(k, n_models), res) return pd.DataFrame(data=results, columns=("model", "objective_value", "load_time", "simulate_time"))
def parse_compounds_sbml(sbml_file, hide_metabolites): """ Parse sbml files to extract compounds to create edges and nodes for igraph. Parameters ---------- sbml_file: str pathname of the sbml file hide_metabolites: list list of metabolites to hide Returns ------- edges: list edges between two compounds (symbolizing the reaction) edges_label: list for each edge the name of the reaction weights: list the weight associated to each edge nodes: list a compound nodes_label: list for each node the name of the compound """ sbml_model = read_sbml_model(sbml_file) edges = [] edges_label = [] weights = [] nodes = {} nodes_label = [] for reaction in sbml_model.reactions: for reactant in reaction.reactants: reactant = convert_from_coded_id(reactant.id)[0] if reactant not in hide_metabolites: for product in reaction.products: product = convert_from_coded_id(product.id)[0] if product not in hide_metabolites: if reactant not in nodes: new_cpd_id = len(nodes_label) nodes_label.append(reactant) nodes[reactant] = new_cpd_id if product not in nodes: new_cpd_id = len(nodes_label) nodes_label.append(product) nodes[product] = new_cpd_id edges.append((nodes[reactant], nodes[product])) weights.append(1) edges_label.append(reaction.id) if reaction.reversibility == True: edges.append((nodes[product], nodes[reactant])) weights.append(1) edges_label.append(reaction.id) return edges, edges_label, weights, nodes, nodes_label
def setUp(self): """Set up Loads a model """ from cobra.io.sbml import read_sbml_model model = read_sbml_model(EC_CORE_MODEL) from mewpy.simulation import get_simulator self.simul = get_simulator(model) k = list(self.simul.objective.keys()) self.BIOMASS_ID = k[0] self.SUCC = 'EX_succ_e'
def flux_analysis(sbml_file, seeds_file=None, targets_file=None, all_species=False): """ 1./ Run flux balance analyse with cobra package on an already defined reaction. Need to set in the sbml the value 'objective_coefficient' to 1. If the reaction is reachable by flux: return the flux value and the flux value for each reactant of the reaction. If not: only return the flux value for each reactant of the reaction. If a reactant has a flux of '0' this means that it is not reachable by flux (and maybe topologically). To unblock the reaction it is required to fix the metabolic network by adding/removing reactions until all reactant are reachable. 2./If seeds and targets given as sbml files with only compounds. Will also try to use the Menetools library to make a topologicall analysis. Topological reachabylity of the targets compounds from the seeds compounds. 3./ If --all_species: will test flux reachability of all the compounds in the metabolic network (may take several minutes) Parameters ---------- sbml_file: str path to sbml file to analyse seeds_file: str path to sbml file with only compounds representing the seeds/growth medium targets_file: str path to sbml file with only compounds representing the targets to reach all_species: bool if True will try to create obj function for each compound and return which are reachable by flux. """ if targets_file: if not os.path.exists(targets_file): raise FileNotFoundError("No target SBML file accessible at " + targets_file) targets = read_sbml_model(targets_file).metabolites if seeds_file: if not os.path.exists(seeds_file): raise FileNotFoundError("No seeds SBML file accessible at " + seeds_file) if not os.path.exists(sbml_file): raise FileNotFoundError("No target SBML file accessible at " + sbml_file) model = read_sbml_model(sbml_file) #nb metabolites real_metabolites = set( [i.id.replace("_" + i.compartment, "") for i in model.metabolites]) rxn_with_ga = [i for i in model.reactions if i.gene_reaction_rule] print("#############") print("Model summary") print("Number of compounds: %s" % len(real_metabolites)) print("Number of reactions: %s" % len(model.reactions)) print("Number of genes: %s" % len(model.genes)) print("Ratio rxn with genes/rxns: %s%%" % (100 * len(rxn_with_ga) / len(model.reactions))) # Launch a topoligical analysis if menetools is installed. if seeds_file and targets_file: print("#############") print("Analyzing targets") print("#Topological analysis") try: from menetools import run_menecheck menetools_result = run_menecheck(draft_sbml=sbml_file, seeds_sbml=seeds_file, targets_sbml=targets_file) print("Number of targets: %s" % (len(targets))) print("Unproductible targets: " + ",".join(menetools_result[0])) print("Productible targets: " + ",".join(menetools_result[1])) except ImportError: print( "Menetools is not installed. Can't run topological analysis.") print("#Flux Balance Analysis") fba_on_targets(targets, model) if all_species: targets = model.metabolites print( "#Flux Balance Analysis on all model metabolites (long process...)" ) fba_on_targets(targets, model) return try: biomassrxn = [ rxn for rxn in model.reactions if rxn.objective_coefficient == 1.0 ][0] biomassname = biomassrxn.id except IndexError: print( "Need to set OBJECTIVE COEFFICIENT to '1.0' for the reaction to test" ) exit() print("#############") print("Computing optimization") solution = model.optimize() print("Testing reaction %s" % biomassname) print("Growth rate: %s" % solution.objective_value) print("Status: %s" % solution.status) model.summary() if (solution.objective_value > 1e-5): blocked = cobra_flux_analysis.find_blocked_reactions( model, model.reactions) essRxns = cobra_flux_analysis.find_essential_reactions(model) essGenes = cobra_flux_analysis.find_essential_genes(model) print('FVA analysis:') print('\tBlocked reactions: %s' % len(blocked)) print('\tEssential reactions: %s' % len(essRxns)) [print(rxn.id) for rxn in essRxns] print('\tEssential genes: %s' % len(essGenes)) #get biomass rxn reactants bms_reactants = dict([(k, v) for k, v in list(biomassrxn.metabolites.items()) if v < 0]) bms_products = dict([(k, v) for k, v in list(biomassrxn.metabolites.items()) if v > 0]) dict_output = {"positive": {}, "negative": {}} #for each metabolite in reactant, create a biomass rxn with only this metabolite in reactants biomassrxn.objective_coefficient = 0.0 for reactant, stoich in list(bms_reactants.items()): test_rxn = Reaction("test_rxn") test_rxn.lower_bound = 0 test_rxn.upper_bound = 1000 metabolitedict = dict(bms_products) metabolitedict.update({reactant: stoich}) model.add_reactions([test_rxn]) test_rxn.add_metabolites(metabolitedict) test_rxn.objective_coefficient = 1.0 solution = model.optimize() if (solution.objective_value > 1e-5): dict_output["positive"][reactant] = solution.objective_value else: dict_output["negative"][reactant] = solution.objective_value model.remove_reactions([test_rxn]) print("%s/%s compounds with positive flux" % (len(list(dict_output["positive"].keys())), len(bms_reactants))) print("%s/%s compounds without flux" % (len(list(dict_output["negative"].keys())), len(bms_reactants))) for k, v in list(dict_output["positive"].items()): print("%s // %s %s positive" % (k, convert_from_coded_id(k.id)[0] + "_" + convert_from_coded_id(k.id)[2], v)) for k, v in list(dict_output["negative"].items()): print("%s // %s %s NULL" % (k, convert_from_coded_id(k.id)[0] + "_" + convert_from_coded_id(k.id)[2], v))
#cobrapytest.py from cobra.io.sbml import read_sbml_model from cobra.io.sbml import write_sbml_model from cobra.test import test_all #test_all() #Load models ECME = read_sbml_model('../SBML/ECME.xml') SCHUETZR = read_sbml_model('../SBML/SCHUETZR.xml') iJO1366b = read_sbml_model('../SBML/iJO1366b.xml') #Perform FBA: ECMEsolution = ECME.optimize() print('ECME status:', ECMEsolution.status) print('ECME:', ECMEsolution.objective_value) SCHUETZRsolution = SCHUETZR.optimize() print('SCHUETZR status:', SCHUETZRsolution.status) print('SCHUETZR objective value:', SCHUETZRsolution.objective_value) iJO1366bsolution = iJO1366b.optimize() print('iJO1366b status:', iJO1366bsolution.status)
else: print( 'Unexpected reaction coefficient in reaction map. Skipping reaction.' ) continue if verbose: print('New upper bound:', modelreaction.upper_bound) print('New lower bound:', modelreaction.lower_bound) return model if __name__ == "__main__": from cobra.io.sbml import read_sbml_model import loadData as load model = read_sbml_model('../SBML/SCHUETZR.xml') rmap = load.ReactionMapfromXML('reactionmaps.xml', 'Perrenoud', 'SCHUETZR') expfluxdict = load.ExpFluxesfromXML('expdata.xml', 'Perrenoud', 'Batch', 'aerobe') experrdict = load.ExpErrorsfromXML('expdata.xml', 'Perrenoud', 'Batch', 'aerobe') constrainedmodel = constrainfluxes(model, expfluxdict, experrdict, rmap, debug=True)
gxFBAsolution = gxFBA(cobramodel) #Does this ,make sense? Z_gxFBA.append(gxFBAsolution.solution.f) if biomassreaction is not None: bmFVA = cobra.flux_analysis.variability.flux_variability_analysis( cobramodel, the_reactions=[biomassreaction]) BMmax.append(bmFVA[biomassreaction]['maximum']) BMmin.append(bmFVA[biomassreaction]['minimum']) return R, FVAres if __name__ == "__main__": model = read_sbml_model('../SBML/SCHUETZR.xml') model.optimize() #fva = gxFBA(model,[],[]) example_a = read_sbml_model('../SBML/gx-fba.xml') ## gx-FBA example model 1: ## Parameters: ## Objective: Biomass ## ATP maintenance: 12 ## Max glucose import (glc_e -> glc_c): 10 ## Max puruvate export (pyr_c -> pyr_e): 5 example_a.optimize() #print 'Example A FBA result:' #print example_a.solution #print example_a.solution.x_dict
def compare_multiple_sbml(sbml_path, output_folder): """ Compare 1-n sbml, create two output files reactions.tsv and metabolites.tsv with the reactions/metabolites in each sbml Parameters ---------- sbml_path: str path to a folder containing sbmls or multiple sbml paths separated by a ',' output_folder: str path to the output folder """ if not os.path.exists(output_folder): print("Creating %s" % output_folder) os.makedirs(output_folder) else: print( "%s already exist, old comparison output folders will be overwritten" % output_folder) if os.path.isdir(sbml_path): if not os.path.exists(sbml_path): raise FileNotFoundError( "No SBML directory (--sbml/sbml_path) accessible at " + sbml_path) all_files = [ os.path.join(sbml_path, f) for f in next(os.walk(sbml_path))[2] ] else: all_files = sbml_path.split(",") for sbml_file in all_files: if not os.path.exists(sbml_file): raise FileNotFoundError( "No SBML file (--sbml/sbml_path) accessible at " + sbml_file) species_columns = [ os.path.splitext(os.path.basename(all_file))[0] for all_file in sorted(all_files) ] gene_columns = [ os.path.splitext(os.path.basename(all_file))[0] + '_genes_assoc (sep=;)' for all_file in sorted(all_files) ] all_reactions = {} all_compounds = [] reactions = {} compounds = {} for sbml_file in all_files: sbml_1 = read_sbml_model(sbml_file) reactions[sbml_file] = sbml_1.reactions for rxn in sbml_1.reactions: if rxn.id not in all_reactions: all_reactions[rxn.id] = rxn compounds[sbml_file] = [ metabolite.id for metabolite in sbml_1.metabolites ] all_compounds.extend( [metabolite.id for metabolite in sbml_1.metabolites]) all_compounds = set(all_compounds) reaction_file = output_folder + '/reactions.tsv' reaction_file_rows = [] for reaction_id in all_reactions: reaction_presents = [] reaction_genes = [] row = [reaction_id] for sbml_file in sorted(all_files): if reaction_id in [rxn.id for rxn in reactions[sbml_file]]: reaction_presents.append(1) else: reaction_presents.append(0) if reaction_id in reactions[sbml_file]: species_reaction = reactions[sbml_file].get_by_id(reaction_id) if 'GENE_ASSOCIATION' in species_reaction.notes: ga_for_gbr = species_reaction.notes['GENE_ASSOCIATION'] ga_for_gbr = re.sub(r" or ", "|", ga_for_gbr) ga_for_gbr = re.sub(r" and ", "&", ga_for_gbr) ga_for_gbr = re.sub(r"\s", "", ga_for_gbr) if re.findall("\||&", ga_for_gbr): to_compare_ga_subsets = list(compile_input(ga_for_gbr)) genes = [] for to_compare_subset in to_compare_ga_subsets: for gene in to_compare_subset: genes.append(gene) else: genes = [ga_for_gbr.replace('(', '').replace(')', '')] reaction_genes.append(';'.join(genes)) else: reaction_genes.append('') else: reaction_genes.append('') row = row + reaction_presents + reaction_genes row.append(all_reactions[reaction_id].reaction) reaction_file_rows.append(row) with open(reaction_file, 'w') as output_reaction: csvwriter = csv.writer(output_reaction, delimiter='\t') csvwriter.writerow( ['reaction', *species_columns, *gene_columns, '_formula']) csvwriter.writerows(reaction_file_rows) compounds_file = output_folder + '/metabolites.tsv' compounds_rows = [] for compound_id in all_compounds: row = [compound_id] for sbml_file in sorted(all_files): if compound_id in compounds[sbml_file]: row.append(1) else: row.append(0) compounds_rows.append(row) with open(compounds_file, 'w') as output_compound: csvwriter = csv.writer(output_compound, delimiter='\t') csvwriter.writerow(['metabolite', *sorted(all_files)]) csvwriter.writerows(compounds_rows)
def compare_sbml(sbml1_path, sbml2_path): """ Compare 2 sbml, print nb of metabolites and reactions. If reaction missing print reaction id, and reaction formula. Parameters ---------- sbml1_path: str path to the first sbml file to compare sbml2_path: str path to the second sbml file to compare """ if not os.path.exists(sbml1_path): raise FileNotFoundError( "No SBML file or directory (sbml1_path) accessible at " + sbml1_path) if not os.path.exists(sbml2_path): raise FileNotFoundError( "No SBML file or directory (sbml2_path) accessible at " + sbml2_path) sbml_1 = read_sbml_model(sbml1_path) sbml_2 = read_sbml_model(sbml2_path) print("sbml1:") print("metabolites: %s" % (len(sbml_1.metabolites))) print("reactions: %s" % (len(sbml_1.reactions))) print("sbml2:") print("metabolites: %s" % (len(sbml_2.metabolites))) print("reactions: %s" % (len(sbml_2.reactions))) not_in1 = [i for i in sbml_2.reactions if i not in sbml_1.reactions] print("reactions not in sbml1: %s" % len(not_in1)) for i in not_in1: print("\t%s" % i) not_in2 = [i for i in sbml_1.reactions if i not in sbml_2.reactions] print("reactions not in sbml2: %s" % len(not_in2)) for j in not_in2: print("\t%s" % j) all_diff = set() for rxn1 in sbml_1.reactions: rxn_id = rxn1.id try: rxn2 = sbml_2.reactions.get_by_id(rxn_id) same_cpd, same_rev = compare_rxn(rxn1, rxn2) if rxn_id not in all_diff: if not same_cpd: print("%s use different species" % rxn_id) if not same_rev: print("%s use different reversibility" % rxn_id) all_diff.add(rxn_id) except KeyError: pass for rxn2 in sbml_2.reactions: rxn_id = rxn2.id try: rxn1 = sbml_1.reactions.get_by_id(rxn_id) same_cpd, same_rev = compare_rxn(rxn1, rxn2) if rxn_id not in all_diff: if not same_cpd: print("%s use different species" % rxn_id) if not same_rev: print("%s use different reversibility" % rxn_id) all_diff.add(rxn_id) except KeyError: pass