def test_gpr(): model = Model() reaction = Reaction("test") # Set GPR to a reaction not in a model reaction.gene_reaction_rule = "(g1 or g2) and g3" assert reaction.gene_reaction_rule == "(g1 or g2) and g3" assert len(reaction.genes) == 3 # Adding reaction with a GPR propagates to the model model.add_reactions([reaction]) assert len(model.genes) == 3 # Ensure the gene objects are the same in the model and reaction reaction_gene = list(reaction.genes)[0] model_gene = model.genes.get_by_id(reaction_gene.id) assert reaction_gene is model_gene # Test ability to handle uppercase AND/OR with warnings.catch_warnings(): warnings.simplefilter("ignore") reaction.gene_reaction_rule = "(b1 AND b2) OR (b3 and b4)" assert reaction.gene_reaction_rule == "(b1 and b2) or (b3 and b4)" assert len(reaction.genes) == 4 # Ensure regular expressions correctly extract genes from malformed # GPR string with warnings.catch_warnings(): warnings.simplefilter("ignore") reaction.gene_reaction_rule = "(a1 or a2" assert len(reaction.genes) == 2 reaction.gene_reaction_rule = "(forT or " assert len(reaction.genes) == 1
def test__has_gene_reaction_rule(): reaction = Reaction('rxn') assert _has_gene_reaction_rule(reaction) is False reaction.gene_reaction_rule = 'b1779' assert _has_gene_reaction_rule(reaction) is True reaction.gene_reaction_rule = ' ' assert _has_gene_reaction_rule(reaction) is False
def test_gpr(self): model = Model() reaction = Reaction("test") # set a gpr to reaction not in a model reaction.gene_reaction_rule = "(g1 or g2) and g3" assert reaction.gene_reaction_rule == "(g1 or g2) and g3" assert len(reaction.genes) == 3 # adding reaction with a GPR propagates to the model model.add_reaction(reaction) assert len(model.genes) == 3 # ensure the gene objects are the same in the model and reaction reaction_gene = list(reaction.genes)[0] model_gene = model.genes.get_by_id(reaction_gene.id) assert reaction_gene is model_gene # test ability to handle uppercase AND/OR with warnings.catch_warnings(): warnings.simplefilter("ignore") reaction.gene_reaction_rule = "(b1 AND b2) OR (b3 and b4)" assert reaction.gene_reaction_rule == "(b1 and b2) or (b3 and b4)" assert len(reaction.genes) == 4 # ensure regular expressions correctly extract genes from malformed # GPR string with warnings.catch_warnings(): warnings.simplefilter("ignore") reaction.gene_reaction_rule = "(a1 or a2" assert len(reaction.genes) == 2 reaction.gene_reaction_rule = "(forT or " assert len(reaction.genes) == 1
def test_gene_knockout(salmonella: Model) -> None: """Test gene knockout.""" gene_list = ["STM1067", "STM0227"] dependent_reactions = { "3HAD121", "3HAD160", "3HAD80", "3HAD140", "3HAD180", "3HAD100", "3HAD181", "3HAD120", "3HAD60", "3HAD141", "3HAD161", "T2DECAI", "3HAD40", } _gene_knockout_computation(salmonella, gene_list, dependent_reactions) _gene_knockout_computation(salmonella, ["STM4221"], {"PGI"}) _gene_knockout_computation(salmonella, ["STM1746.S"], {"4PEPTabcpp"}) # test cumulative behavior delete_model_genes(salmonella, gene_list[:1]) delete_model_genes(salmonella, gene_list[1:], cumulative_deletions=True) delete_model_genes(salmonella, ["STM4221"], cumulative_deletions=True) dependent_reactions.add("PGI") assert _get_removed(salmonella) == dependent_reactions # non-cumulative following cumulative delete_model_genes(salmonella, ["STM4221"], cumulative_deletions=False) assert _get_removed(salmonella) == {"PGI"} # make sure on reset that the bounds are correct reset_bound = salmonella.reactions.get_by_id("T2DECAI").upper_bound assert reset_bound == 1000.0 # test computation when gene name is a subset of another test_model = Model() test_reaction_1 = Reaction("test1") test_reaction_1.gene_reaction_rule = "eggs or (spam and eggspam)" test_model.add_reactions([test_reaction_1]) _gene_knockout_computation(test_model, ["eggs"], set()) _gene_knockout_computation(test_model, ["eggs", "spam"], {"test1"}) # test computation with nested boolean expression test_reaction_1.gene_reaction_rule = "g1 and g2 and (g3 or g4 or (g5 and g6))" _gene_knockout_computation(test_model, ["g3"], set()) _gene_knockout_computation(test_model, ["g1"], {"test1"}) _gene_knockout_computation(test_model, ["g5"], set()) _gene_knockout_computation(test_model, ["g3", "g4", "g5"], {"test1"}) # test computation when gene names are python expressions test_reaction_1.gene_reaction_rule = "g1 and (for or in)" _gene_knockout_computation(test_model, ["for", "in"], {"test1"}) _gene_knockout_computation(test_model, ["for"], set()) test_reaction_1.gene_reaction_rule = "g1 and g2 and g2.conjugate" _gene_knockout_computation(test_model, ["g2"], {"test1"}) _gene_knockout_computation(test_model, ["g2.conjugate"], {"test1"}) test_reaction_1.gene_reaction_rule = "g1 and (try:' or 'except:1)" _gene_knockout_computation(test_model, ["try:'"], set()) _gene_knockout_computation(test_model, ["try:'", "'except:1"], {"test1"})
def test__normalize_pseudoreaction_biomass_has_gpr(): reaction = Reaction('my_biomass_2') reaction.gene_reaction_rule = 'b1779' with pytest.raises(ConflictingPseudoreaction) as excinfo: _ = _normalize_pseudoreaction(reaction.id, reaction) assert 'has a gene_reaction_rule' in str(excinfo.value) assert reaction.id == 'my_biomass_2'
def test_add_reaction_context(model): old_reaction_count = len(model.reactions) old_metabolite_count = len(model.metabolites) dummy_metabolite_1 = Metabolite("test_foo_1") dummy_metabolite_2 = Metabolite("test_foo_2") actual_metabolite = model.metabolites[0] copy_metabolite = model.metabolites[1].copy() dummy_reaction = Reaction("test_foo_reaction") dummy_reaction.add_metabolites({dummy_metabolite_1: -1, dummy_metabolite_2: 1, copy_metabolite: -2, actual_metabolite: 1}) dummy_reaction.gene_reaction_rule = 'dummy_gene' with model: model.add_reaction(dummy_reaction) assert model.reactions.get_by_id( dummy_reaction.id) == dummy_reaction assert len(model.reactions) == old_reaction_count + 1 assert len(model.metabolites) == old_metabolite_count + 2 assert dummy_metabolite_1._model == model assert 'dummy_gene' in model.genes assert len(model.reactions) == old_reaction_count assert len(model.metabolites) == old_metabolite_count with pytest.raises(KeyError): model.reactions.get_by_id(dummy_reaction.id) assert dummy_metabolite_1._model is None assert 'dummy_gene' not in model.genes
def test_add_reaction_context(self, model): old_reaction_count = len(model.reactions) old_metabolite_count = len(model.metabolites) dummy_metabolite_1 = Metabolite("test_foo_1") dummy_metabolite_2 = Metabolite("test_foo_2") actual_metabolite = model.metabolites[0] copy_metabolite = model.metabolites[1].copy() dummy_reaction = Reaction("test_foo_reaction") dummy_reaction.add_metabolites({ dummy_metabolite_1: -1, dummy_metabolite_2: 1, copy_metabolite: -2, actual_metabolite: 1 }) dummy_reaction.gene_reaction_rule = 'dummy_gene' with model: model.add_reaction(dummy_reaction) assert model.reactions.get_by_id(dummy_reaction.id) == \ dummy_reaction assert len(model.reactions) == old_reaction_count + 1 assert len(model.metabolites) == old_metabolite_count + 2 assert dummy_metabolite_1._model == model assert 'dummy_gene' in model.genes assert len(model.reactions) == old_reaction_count assert len(model.metabolites) == old_metabolite_count with pytest.raises(KeyError): model.reactions.get_by_id(dummy_reaction.id) assert dummy_metabolite_1._model is None assert 'dummy_gene' not in model.genes
def convert_modelreaction(self, reaction, bigg=False): mr_id = reaction.id name = reaction.name annotation = reaction.annotation lower_bound, upper_bound = reaction.get_reaction_constraints() id = build_rxn_id(mr_id) if bigg and "bigg.reaction" in annotation: id = annotation["bigg.reaction"] gpr = reaction.get_gpr() cobra_reaction = Reaction(id, name=name, lower_bound=lower_bound, upper_bound=upper_bound) cobra_reaction.annotation[self.SBO_ANNOTATION] = "SBO:0000176" #biochemical reaction cobra_reaction.annotation.update(annotation) if id.startswith('rxn'): cobra_reaction.annotation["seed.reaction"] = id.split("_")[0] cobra_reaction.add_metabolites(self.convert_modelreaction_stoichiometry(reaction)) cobra_reaction.gene_reaction_rule = reaction.gene_reaction_rule for genes in gpr: for gene in genes: if not gene in self.genes: self.genes[gene] = gene return cobra_reaction
def test__normalize_pseudoreaction_exchange_error_has_gpr(): reaction = Reaction('EX_gone') reaction.add_metabolites({Metabolite('glu__L_e'): -1}) reaction.gene_reaction_rule = 'b1779' with pytest.raises(ConflictingPseudoreaction) as excinfo: _ = _normalize_pseudoreaction(reaction.id, reaction) assert 'has a gene_reaction_rule' in str(excinfo.value) assert reaction.id == 'EX_gone'
def test__normalize_pseudoreaction_atpm_has_gpr(): reaction = Reaction('NPT1') reaction.add_metabolites({Metabolite('atp_c'): -1, Metabolite('h2o_c'): -1, Metabolite('pi_c'): 1, Metabolite('h_c'): 1, Metabolite('adp_c'): 1}) reaction.gene_reaction_rule = 'b1779' _normalize_pseudoreaction(reaction) # should not change assert reaction.id == 'NPT1'
def test_gene_knock_out(self, model): rxn = Reaction('rxn') rxn.add_metabolites({Metabolite('A'): -1, Metabolite('B'): 1}) rxn.gene_reaction_rule = 'A2B1 or A2B2 and A2B3' assert hasattr(list(rxn.genes)[0], 'knock_out') model.add_reaction(rxn) with model: model.genes.A2B1.knock_out() assert not model.genes.A2B1.functional model.genes.A2B3.knock_out() assert not rxn.functional assert model.genes.A2B3.functional assert rxn.functional model.genes.A2B1.knock_out() assert not model.genes.A2B1.functional assert model.reactions.rxn.functional model.genes.A2B3.knock_out() assert not model.reactions.rxn.functional
def test_gene_knock_out(model): rxn = Reaction('rxn') rxn.add_metabolites({Metabolite('A'): -1, Metabolite('B'): 1}) rxn.gene_reaction_rule = 'A2B1 or A2B2 and A2B3' assert hasattr(list(rxn.genes)[0], 'knock_out') model.add_reaction(rxn) with model: model.genes.A2B1.knock_out() assert not model.genes.A2B1.functional model.genes.A2B3.knock_out() assert not rxn.functional assert model.genes.A2B3.functional assert rxn.functional model.genes.A2B1.knock_out() assert not model.genes.A2B1.functional assert model.reactions.rxn.functional model.genes.A2B3.knock_out() assert not model.reactions.rxn.functional
def test_gene_knock_out(model: Model) -> None: """Test gene knockout effect on reaction.""" rxn = Reaction("rxn") rxn.add_metabolites({Metabolite("A"): -1, Metabolite("B"): 1}) rxn.gene_reaction_rule = "A2B1 or A2B2 and A2B3" assert hasattr(list(rxn.genes)[0], "knock_out") model.add_reaction(rxn) with model: model.genes.A2B1.knock_out() assert not model.genes.A2B1.functional model.genes.A2B3.knock_out() assert not rxn.functional assert model.genes.A2B3.functional assert rxn.functional model.genes.A2B1.knock_out() assert not model.genes.A2B1.functional assert model.reactions.rxn.functional model.genes.A2B3.knock_out() assert not model.reactions.rxn.functional
def convert_kmodel(kmodel, media=None, exchanges=True, model_id="kbase"): model_test = cobra.Model(model_id) comps = {} mets = {} sink = set() reactions = [] extra = set() for mcomp in kmodel['modelcompartments']: mcomp_id = mcomp['id'] name = mcomp['label'] comps[mcomp_id] = name for mc in kmodel["modelcompounds"]: #print(mc.keys()) formula = None if not mc['formula'] == 'null': formula = mc['formula'] name = mc['name'] charge = get_int('charge', 0, mc) mc_id = mc['id'] annotation = {} if 'dblinks' in mc: annotation = get_cpd_annotation(mc['dblinks']) compartment = get_compartment_id(mc, simple=True) id = build_cpd_id(mc_id) if bigg: if "bigg.metabolite" in annotation: id = annotation["bigg.metabolite"] + "_" + compartment #print(id) if mc_id in SINK: logger.info('Add Sink: [%s]', mc_id) extra.add(mc_id) sink.add(mc_id) if compartment.startswith("e"): extra.add(mc_id) met = Metabolite(id=id, formula=formula, name=name, charge=charge, compartment=compartment) met.annotation[ SBO_ANNOTATION] = "SBO:0000247" #simple chemical - Simple, non-repetitive chemical entity. if id.startswith('cpd'): met.annotation["seed.compound"] = id.split("_")[0] #met.annotation[""] = "!!!" met.annotation.update(annotation) mets[mc_id] = met genes = set() #print(mc) for mr in kmodel["modelreactions"]: mr_id = mr['id'] name = mr['name'] lower_bound, upper_bound = get_reaction_constraints(mr) annotation = {} if 'dblinks' in mr: annotation = get_rxn_annotation(mr['dblinks']) id = build_rxn_id(mr_id) if bigg: if "bigg.reaction" in annotation: id = annotation["bigg.reaction"] #print(id) reaction = Reaction(id=id, name=name, lower_bound=lower_bound, upper_bound=upper_bound) #print(mr['maxrevflux'], mr['maxforflux'], reaction.lower_bound) reaction.annotation[SBO_ANNOTATION] = "!!!" if id.startswith('rxn'): reaction.annotation["seed.reaction"] = id.split("_")[0] reaction.annotation.update(annotation) object_stoichiometry = {} for mrr in mr['modelReactionReagents']: modelcompound_ref = mrr['modelcompound_ref'] coefficient = mrr['coefficient'] mc_id = get_id_from_ref(modelcompound_ref) met_id = build_cpd_id(mc_id) met = mets[mc_id] #model_test.metabolites.get_by_id(met_id) #print(met, met_id, coefficient) object_stoichiometry[met] = coefficient reaction.annotation[ SBO_ANNOTATION] = "SBO:0000176" #biochemical reaction reaction.add_metabolites(object_stoichiometry) gpr = get_gpr(mr) gpr_string = get_gpr_string(gpr) #print(gpr_string) reaction.gene_reaction_rule = gpr_string genes |= get_genes(gpr) #print(reaction) reactions.append(reaction) #print(mr.keys()) objective_id = None for biomass in kmodel['biomasses']: reaction = convert_biomass_to_reaction(biomass, mets) reactions.append(reaction) objective_id = reaction.id #print(biomass) #print(media) if exchanges: logger.info('Setup Drains. EX: %d SK: %d', len(extra), len(sink)) for e in extra: met = mets[e] prefix = "EX_" if e in sink: prefix = "DM_" id = prefix + met.id lower_bound = COBRA_DEFAULT_LB upper_bound = COBRA_DEFAULT_UB if not media == None: lower_bound = 0 if not media == None and e.split("_")[0] in media: ct = media[e.split("_")[0]] lower_bound = ct[0] upper_bound = ct[1] #print(e, met, id, lower_bound, upper_bound) object_stoichiometry = {met: -1} reaction = Reaction(id=id, name="Exchange for " + met.name, lower_bound=lower_bound, upper_bound=upper_bound) reaction.add_metabolites(object_stoichiometry) reaction.annotation[ SBO_ANNOTATION] = "SBO:0000627" #exchange reaction - ... provide matter influx or efflux to a model, for example to replenish a metabolic network with raw materials ... reactions.append(reaction) #print(reaction.name) #print("Genes:", genes) for g in genes: gene = Gene(id=build_gene_id(g), name=g) gene.annotation[SBO_ANNOTATION] = "SBO:0000243" model_test.genes.append(gene) model_test.compartments = comps #model_test.add_metabolites(mets.values) try: model_test.add_reactions(reactions) except ValueError as e: warn(str(e)) if not objective_id == None: model_test.objective = model_test.reactions.get_by_id(id=objective_id) linear_reaction_coefficients(model_test) return model_test
def from_mat_struct(mat_struct, model_id=None, inf=inf): """create a model from the COBRA toolbox struct The struct will be a dict read in by scipy.io.loadmat """ m = mat_struct if m.dtype.names is None: raise ValueError("not a valid mat struct") if not {"rxns", "mets", "S", "lb", "ub"} <= set(m.dtype.names): raise ValueError("not a valid mat struct") if "c" in m.dtype.names: c_vec = m["c"][0, 0] else: c_vec = None warn("objective vector 'c' not found") model = Model() if model_id is not None: model.id = model_id elif "description" in m.dtype.names: description = m["description"][0, 0][0] if not isinstance(description, string_types) and len(description) > 1: model.id = description[0] warn("Several IDs detected, only using the first.") else: model.id = description else: model.id = "imported_model" for i, name in enumerate(m["mets"][0, 0]): new_metabolite = Metabolite() new_metabolite.id = str(name[0][0]) if all(var in m.dtype.names for var in ['metComps', 'comps', 'compNames']): comp_index = m["metComps"][0, 0][i][0] - 1 new_metabolite.compartment = m['comps'][0, 0][comp_index][0][0] if new_metabolite.compartment not in model.compartments: comp_name = m['compNames'][0, 0][comp_index][0][0] model.compartments[new_metabolite.compartment] = comp_name else: new_metabolite.compartment = _get_id_compartment(new_metabolite.id) if new_metabolite.compartment not in model.compartments: model.compartments[ new_metabolite.compartment] = new_metabolite.compartment try: new_metabolite.name = str(m["metNames"][0, 0][i][0][0]) except (IndexError, ValueError): pass try: new_metabolite.formula = str(m["metFormulas"][0][0][i][0][0]) except (IndexError, ValueError): pass try: new_metabolite.charge = float(m["metCharge"][0, 0][i][0]) int_charge = int(new_metabolite.charge) if new_metabolite.charge == int_charge: new_metabolite.charge = int_charge except (IndexError, ValueError): pass model.add_metabolites([new_metabolite]) new_reactions = [] coefficients = {} for i, name in enumerate(m["rxns"][0, 0]): new_reaction = Reaction() new_reaction.id = str(name[0][0]) new_reaction.lower_bound = float(m["lb"][0, 0][i][0]) new_reaction.upper_bound = float(m["ub"][0, 0][i][0]) if isinf(new_reaction.lower_bound) and new_reaction.lower_bound < 0: new_reaction.lower_bound = -inf if isinf(new_reaction.upper_bound) and new_reaction.upper_bound > 0: new_reaction.upper_bound = inf if c_vec is not None: coefficients[new_reaction] = float(c_vec[i][0]) try: new_reaction.gene_reaction_rule = str(m['grRules'][0, 0][i][0][0]) except (IndexError, ValueError): pass try: new_reaction.name = str(m["rxnNames"][0, 0][i][0][0]) except (IndexError, ValueError): pass try: new_reaction.subsystem = str(m['subSystems'][0, 0][i][0][0]) except (IndexError, ValueError): pass new_reactions.append(new_reaction) model.add_reactions(new_reactions) set_objective(model, coefficients) coo = scipy_sparse.coo_matrix(m["S"][0, 0]) for i, j, v in zip(coo.row, coo.col, coo.data): model.reactions[j].add_metabolites({model.metabolites[i]: v}) return model
def test_gene_knockout_computation(self, salmonella): def find_gene_knockout_reactions_fast(cobra_model, gene_list): compiled_rules = get_compiled_gene_reaction_rules( cobra_model) return find_gene_knockout_reactions( cobra_model, gene_list, compiled_gene_reaction_rules=compiled_rules) def get_removed(m): return {x.id for x in m._trimmed_reactions} def test_computation(m, gene_ids, expected_reaction_ids): genes = [m.genes.get_by_id(i) for i in gene_ids] expected_reactions = {m.reactions.get_by_id(i) for i in expected_reaction_ids} removed1 = set(find_gene_knockout_reactions(m, genes)) removed2 = set(find_gene_knockout_reactions_fast(m, genes)) assert removed1 == expected_reactions assert removed2 == expected_reactions delete_model_genes(m, gene_ids, cumulative_deletions=False) assert get_removed(m) == expected_reaction_ids undelete_model_genes(m) gene_list = ['STM1067', 'STM0227'] dependent_reactions = {'3HAD121', '3HAD160', '3HAD80', '3HAD140', '3HAD180', '3HAD100', '3HAD181', '3HAD120', '3HAD60', '3HAD141', '3HAD161', 'T2DECAI', '3HAD40'} test_computation(salmonella, gene_list, dependent_reactions) test_computation(salmonella, ['STM4221'], {'PGI'}) test_computation(salmonella, ['STM1746.S'], {'4PEPTabcpp'}) # test cumulative behavior delete_model_genes(salmonella, gene_list[:1]) delete_model_genes(salmonella, gene_list[1:], cumulative_deletions=True) delete_model_genes(salmonella, ["STM4221"], cumulative_deletions=True) dependent_reactions.add('PGI') assert get_removed(salmonella) == dependent_reactions # non-cumulative following cumulative delete_model_genes(salmonella, ["STM4221"], cumulative_deletions=False) assert get_removed(salmonella) == {'PGI'} # make sure on reset that the bounds are correct reset_bound = salmonella.reactions.get_by_id("T2DECAI").upper_bound assert reset_bound == 1000. # test computation when gene name is a subset of another test_model = Model() test_reaction_1 = Reaction("test1") test_reaction_1.gene_reaction_rule = "eggs or (spam and eggspam)" test_model.add_reaction(test_reaction_1) test_computation(test_model, ["eggs"], set()) test_computation(test_model, ["eggs", "spam"], {'test1'}) # test computation with nested boolean expression test_reaction_1.gene_reaction_rule = \ "g1 and g2 and (g3 or g4 or (g5 and g6))" test_computation(test_model, ["g3"], set()) test_computation(test_model, ["g1"], {'test1'}) test_computation(test_model, ["g5"], set()) test_computation(test_model, ["g3", "g4", "g5"], {'test1'}) # test computation when gene names are python expressions test_reaction_1.gene_reaction_rule = "g1 and (for or in)" test_computation(test_model, ["for", "in"], {'test1'}) test_computation(test_model, ["for"], set()) test_reaction_1.gene_reaction_rule = "g1 and g2 and g2.conjugate" test_computation(test_model, ["g2"], {"test1"}) test_computation(test_model, ["g2.conjugate"], {"test1"}) test_reaction_1.gene_reaction_rule = "g1 and (try:' or 'except:1)" test_computation(test_model, ["try:'"], set()) test_computation(test_model, ["try:'", "'except:1"], {"test1"})
def create_cobra_model_from_sbml_file(sbml_filename, old_sbml=False, legacy_metabolite=False, print_time=False, use_hyphens=False): """convert an SBML XML file into a cobra.Model object. Supports SBML Level 2 Versions 1 and 4. The function will detect if the SBML fbc package is used in the file and run the converter if the fbc package is used. Parameters ---------- sbml_filename: string old_sbml: bool Set to True if the XML file has metabolite formula appended to metabolite names. This was a poorly designed artifact that persists in some models. legacy_metabolite: bool If True then assume that the metabolite id has the compartment id appended after an underscore (e.g. _c for cytosol). This has not been implemented but will be soon. print_time: bool deprecated use_hyphens: bool If True, double underscores (__) in an SBML ID will be converted to hyphens Returns ------- Model : The parsed cobra model """ if not libsbml: raise ImportError('create_cobra_model_from_sbml_file ' 'requires python-libsbml') __default_lower_bound = -1000 __default_upper_bound = 1000 __default_objective_coefficient = 0 # Ensure that the file exists if not isfile(sbml_filename): raise IOError('Your SBML file is not found: %s' % sbml_filename) # Expressions to change SBML Ids to Palsson Lab Ids metabolite_re = re.compile('^M_') reaction_re = re.compile('^R_') compartment_re = re.compile('^C_') if print_time: warn("print_time is deprecated", DeprecationWarning) model_doc = libsbml.readSBML(sbml_filename) if model_doc.getPlugin("fbc") is not None: from libsbml import ConversionProperties, LIBSBML_OPERATION_SUCCESS conversion_properties = ConversionProperties() conversion_properties.addOption( "convert fbc to cobra", True, "Convert FBC model to Cobra model") result = model_doc.convert(conversion_properties) if result != LIBSBML_OPERATION_SUCCESS: raise Exception("Conversion of SBML+fbc to COBRA failed") sbml_model = model_doc.getModel() sbml_model_id = sbml_model.getId() sbml_species = sbml_model.getListOfSpecies() sbml_reactions = sbml_model.getListOfReactions() sbml_compartments = sbml_model.getListOfCompartments() compartment_dict = dict([(compartment_re.split(x.getId())[-1], x.getName()) for x in sbml_compartments]) if legacy_metabolite: # Deal with the palsson lab appending the compartment id to the # metabolite id new_dict = {} for the_id, the_name in compartment_dict.items(): if the_name == '': new_dict[the_id[0].lower()] = the_id else: new_dict[the_id] = the_name compartment_dict = new_dict legacy_compartment_converter = dict( [(v, k) for k, v in iteritems(compartment_dict)]) cobra_model = Model(sbml_model_id) metabolites = [] metabolite_dict = {} # Convert sbml_metabolites to cobra.Metabolites for sbml_metabolite in sbml_species: # Skip sbml boundary species if sbml_metabolite.getBoundaryCondition(): continue if (old_sbml or legacy_metabolite) and \ sbml_metabolite.getId().endswith('_b'): # Deal with incorrect sbml from bigg.ucsd.edu continue tmp_metabolite = Metabolite() metabolite_id = tmp_metabolite.id = sbml_metabolite.getId() tmp_metabolite.compartment = compartment_re.split( sbml_metabolite.getCompartment())[-1] if legacy_metabolite: if tmp_metabolite.compartment not in compartment_dict: tmp_metabolite.compartment = legacy_compartment_converter[ tmp_metabolite.compartment] tmp_metabolite.id = parse_legacy_id( tmp_metabolite.id, tmp_metabolite.compartment, use_hyphens=use_hyphens) if use_hyphens: tmp_metabolite.id = metabolite_re.split( tmp_metabolite.id)[-1].replace('__', '-') else: # Just in case the SBML ids are ill-formed and use - tmp_metabolite.id = metabolite_re.split( tmp_metabolite.id)[-1].replace('-', '__') tmp_metabolite.name = sbml_metabolite.getName() tmp_formula = '' tmp_metabolite.notes = parse_legacy_sbml_notes( sbml_metabolite.getNotesString()) if sbml_metabolite.isSetCharge(): tmp_metabolite.charge = sbml_metabolite.getCharge() if "CHARGE" in tmp_metabolite.notes: note_charge = tmp_metabolite.notes["CHARGE"][0] try: note_charge = float(note_charge) if note_charge == int(note_charge): note_charge = int(note_charge) except: warn("charge of %s is not a number (%s)" % (tmp_metabolite.id, str(note_charge))) else: if ((tmp_metabolite.charge is None) or (tmp_metabolite.charge == note_charge)): tmp_metabolite.notes.pop("CHARGE") # set charge to the one from notes if not assigend before # the same tmp_metabolite.charge = note_charge else: # tmp_metabolite.charge != note_charge msg = "different charges specified for %s (%d and %d)" msg = msg % (tmp_metabolite.id, tmp_metabolite.charge, note_charge) warn(msg) # Chances are a 0 note charge was written by mistake. We # will default to the note_charge in this case. if tmp_metabolite.charge == 0: tmp_metabolite.charge = note_charge for the_key in tmp_metabolite.notes.keys(): if the_key.lower() == 'formula': tmp_formula = tmp_metabolite.notes.pop(the_key)[0] break if tmp_formula == '' and old_sbml: tmp_formula = tmp_metabolite.name.split('_')[-1] tmp_metabolite.name = tmp_metabolite.name[:-len(tmp_formula) - 1] tmp_metabolite.formula = tmp_formula metabolite_dict.update({metabolite_id: tmp_metabolite}) metabolites.append(tmp_metabolite) cobra_model.add_metabolites(metabolites) # Construct the vectors and matrices for holding connectivity and numerical # info to feed to the cobra toolbox. # Always assume steady state simulations so b is set to 0 cobra_reaction_list = [] coefficients = {} for sbml_reaction in sbml_reactions: if use_hyphens: # Change the ids to match conventions used by the Palsson lab. reaction = Reaction(reaction_re.split( sbml_reaction.getId())[-1].replace('__', '-')) else: # Just in case the SBML ids are ill-formed and use - reaction = Reaction(reaction_re.split( sbml_reaction.getId())[-1].replace('-', '__')) cobra_reaction_list.append(reaction) # reaction.exchange_reaction = 0 reaction.name = sbml_reaction.getName() cobra_metabolites = {} # Use the cobra.Metabolite class here for sbml_metabolite in sbml_reaction.getListOfReactants(): tmp_metabolite_id = sbml_metabolite.getSpecies() # This deals with boundary metabolites if tmp_metabolite_id in metabolite_dict: tmp_metabolite = metabolite_dict[tmp_metabolite_id] cobra_metabolites[tmp_metabolite] = - \ sbml_metabolite.getStoichiometry() for sbml_metabolite in sbml_reaction.getListOfProducts(): tmp_metabolite_id = sbml_metabolite.getSpecies() # This deals with boundary metabolites if tmp_metabolite_id in metabolite_dict: tmp_metabolite = metabolite_dict[tmp_metabolite_id] # Handle the case where the metabolite was specified both # as a reactant and as a product. if tmp_metabolite in cobra_metabolites: warn("%s appears as a reactant and product %s" % (tmp_metabolite_id, reaction.id)) cobra_metabolites[ tmp_metabolite] += sbml_metabolite.getStoichiometry() # if the combined stoichiometry is 0, remove the metabolite if cobra_metabolites[tmp_metabolite] == 0: cobra_metabolites.pop(tmp_metabolite) else: cobra_metabolites[ tmp_metabolite] = sbml_metabolite.getStoichiometry() # check for nan for met, v in iteritems(cobra_metabolites): if isnan(v) or isinf(v): warn("invalid value %s for metabolite '%s' in reaction '%s'" % (str(v), met.id, reaction.id)) reaction.add_metabolites(cobra_metabolites) # Parse the kinetic law info here. parameter_dict = {} # If lower and upper bounds are specified in the Kinetic Law then # they override the sbml reversible attribute. If they are not # specified then the bounds are determined by getReversible. if not sbml_reaction.getKineticLaw(): if sbml_reaction.getReversible(): parameter_dict['lower_bound'] = __default_lower_bound parameter_dict['upper_bound'] = __default_upper_bound else: # Assume that irreversible reactions only proceed from left to # right. parameter_dict['lower_bound'] = 0 parameter_dict['upper_bound'] = __default_upper_bound parameter_dict[ 'objective_coefficient'] = __default_objective_coefficient else: for sbml_parameter in \ sbml_reaction.getKineticLaw().getListOfParameters(): parameter_dict[ sbml_parameter.getId().lower()] = sbml_parameter.getValue() if 'lower_bound' in parameter_dict: reaction.lower_bound = parameter_dict['lower_bound'] elif 'lower bound' in parameter_dict: reaction.lower_bound = parameter_dict['lower bound'] elif sbml_reaction.getReversible(): reaction.lower_bound = __default_lower_bound else: reaction.lower_bound = 0 if 'upper_bound' in parameter_dict: reaction.upper_bound = parameter_dict['upper_bound'] elif 'upper bound' in parameter_dict: reaction.upper_bound = parameter_dict['upper bound'] else: reaction.upper_bound = __default_upper_bound objective_coefficient = parameter_dict.get( 'objective_coefficient', parameter_dict.get( 'objective_coefficient', __default_objective_coefficient)) if objective_coefficient != 0: coefficients[reaction] = objective_coefficient # ensure values are not set to nan or inf if isnan(reaction.lower_bound) or isinf(reaction.lower_bound): reaction.lower_bound = __default_lower_bound if isnan(reaction.upper_bound) or isinf(reaction.upper_bound): reaction.upper_bound = __default_upper_bound reaction_note_dict = parse_legacy_sbml_notes( sbml_reaction.getNotesString()) # Parse the reaction notes. # POTENTIAL BUG: DEALING WITH LEGACY 'SBML' THAT IS NOT IN A # STANDARD FORMAT # TODO: READ IN OTHER NOTES AND GIVE THEM A reaction_ prefix. # TODO: Make sure genes get added as objects if 'GENE ASSOCIATION' in reaction_note_dict: rule = reaction_note_dict['GENE ASSOCIATION'][0] try: rule.encode('ascii') except (UnicodeEncodeError, UnicodeDecodeError): warn("gene_reaction_rule '%s' is not ascii compliant" % rule) if rule.startswith(""") and rule.endswith("""): rule = rule[6:-6] reaction.gene_reaction_rule = rule if 'GENE LIST' in reaction_note_dict: reaction.systematic_names = reaction_note_dict['GENE LIST'][0] elif ('GENES' in reaction_note_dict and reaction_note_dict['GENES'] != ['']): reaction.systematic_names = reaction_note_dict['GENES'][0] elif 'LOCUS' in reaction_note_dict: gene_id_to_object = dict([(x.id, x) for x in reaction._genes]) for the_row in reaction_note_dict['LOCUS']: tmp_row_dict = {} the_row = 'LOCUS:' + the_row.lstrip('_').rstrip('#') for the_item in the_row.split('#'): k, v = the_item.split(':') tmp_row_dict[k] = v tmp_locus_id = tmp_row_dict['LOCUS'] if 'TRANSCRIPT' in tmp_row_dict: tmp_locus_id = tmp_locus_id + \ '.' + tmp_row_dict['TRANSCRIPT'] if 'ABBREVIATION' in tmp_row_dict: gene_id_to_object[tmp_locus_id].name = tmp_row_dict[ 'ABBREVIATION'] if 'SUBSYSTEM' in reaction_note_dict: reaction.subsystem = reaction_note_dict.pop('SUBSYSTEM')[0] reaction.notes = reaction_note_dict # Now, add all of the reactions to the model. cobra_model.id = sbml_model.getId() # Populate the compartment list - This will be done based on # cobra.Metabolites in cobra.Reactions in the future. cobra_model.compartments = compartment_dict cobra_model.add_reactions(cobra_reaction_list) set_objective(cobra_model, coefficients) return cobra_model
def parse_xml_into_model(xml, number=float): xml_model = xml.find(ns("sbml:model")) if get_attrib(xml_model, "fbc:strict") != "true": warn('loading SBML model without fbc:strict="true"') model_id = get_attrib(xml_model, "id") model = Model(model_id) model.name = xml_model.get("name") model.compartments = {c.get("id"): c.get("name") for c in xml_model.findall(COMPARTMENT_XPATH)} # add metabolites for species in xml_model.findall(SPECIES_XPATH % 'false'): met = get_attrib(species, "id", require=True) met = Metabolite(clip(met, "M_")) met.name = species.get("name") annotate_cobra_from_sbml(met, species) met.compartment = species.get("compartment") met.charge = get_attrib(species, "fbc:charge", int) met.formula = get_attrib(species, "fbc:chemicalFormula") model.add_metabolites([met]) # Detect boundary metabolites - In case they have been mistakenly # added. They should not actually appear in a model boundary_metabolites = {clip(i.get("id"), "M_") for i in xml_model.findall(SPECIES_XPATH % 'true')} # add genes for sbml_gene in xml_model.iterfind(GENES_XPATH): gene_id = get_attrib(sbml_gene, "fbc:id").replace(SBML_DOT, ".") gene = Gene(clip(gene_id, "G_")) gene.name = get_attrib(sbml_gene, "fbc:name") if gene.name is None: gene.name = get_attrib(sbml_gene, "fbc:label") annotate_cobra_from_sbml(gene, sbml_gene) model.genes.append(gene) def process_gpr(sub_xml): """recursively convert gpr xml to a gpr string""" if sub_xml.tag == OR_TAG: return "( " + ' or '.join(process_gpr(i) for i in sub_xml) + " )" elif sub_xml.tag == AND_TAG: return "( " + ' and '.join(process_gpr(i) for i in sub_xml) + " )" elif sub_xml.tag == GENEREF_TAG: gene_id = get_attrib(sub_xml, "fbc:geneProduct", require=True) return clip(gene_id, "G_") else: raise Exception("unsupported tag " + sub_xml.tag) bounds = {bound.get("id"): get_attrib(bound, "value", type=number) for bound in xml_model.iterfind(BOUND_XPATH)} # add reactions reactions = [] for sbml_reaction in xml_model.iterfind( ns("sbml:listOfReactions/sbml:reaction")): reaction = get_attrib(sbml_reaction, "id", require=True) reaction = Reaction(clip(reaction, "R_")) reaction.name = sbml_reaction.get("name") annotate_cobra_from_sbml(reaction, sbml_reaction) lb_id = get_attrib(sbml_reaction, "fbc:lowerFluxBound", require=True) ub_id = get_attrib(sbml_reaction, "fbc:upperFluxBound", require=True) try: reaction.upper_bound = bounds[ub_id] reaction.lower_bound = bounds[lb_id] except KeyError as e: raise CobraSBMLError("No constant bound with id '%s'" % str(e)) reactions.append(reaction) stoichiometry = defaultdict(lambda: 0) for species_reference in sbml_reaction.findall( ns("sbml:listOfReactants/sbml:speciesReference")): met_name = clip(species_reference.get("species"), "M_") stoichiometry[met_name] -= \ number(species_reference.get("stoichiometry")) for species_reference in sbml_reaction.findall( ns("sbml:listOfProducts/sbml:speciesReference")): met_name = clip(species_reference.get("species"), "M_") stoichiometry[met_name] += \ get_attrib(species_reference, "stoichiometry", type=number, require=True) # needs to have keys of metabolite objects, not ids object_stoichiometry = {} for met_id in stoichiometry: if met_id in boundary_metabolites: warn("Boundary metabolite '%s' used in reaction '%s'" % (met_id, reaction.id)) continue try: metabolite = model.metabolites.get_by_id(met_id) except KeyError: warn("ignoring unknown metabolite '%s' in reaction %s" % (met_id, reaction.id)) continue object_stoichiometry[metabolite] = stoichiometry[met_id] reaction.add_metabolites(object_stoichiometry) # set gene reaction rule gpr_xml = sbml_reaction.find(GPR_TAG) if gpr_xml is not None and len(gpr_xml) != 1: warn("ignoring invalid geneAssociation for " + repr(reaction)) gpr_xml = None gpr = process_gpr(gpr_xml[0]) if gpr_xml is not None else '' # remove outside parenthesis, if any if gpr.startswith("(") and gpr.endswith(")"): gpr = gpr[1:-1].strip() gpr = gpr.replace(SBML_DOT, ".") reaction.gene_reaction_rule = gpr try: model.add_reactions(reactions) except ValueError as e: warn(str(e)) # objective coefficients are handled after all reactions are added obj_list = xml_model.find(ns("fbc:listOfObjectives")) if obj_list is None: warn("listOfObjectives element not found") return model target_objective_id = get_attrib(obj_list, "fbc:activeObjective") target_objective = obj_list.find( ns("fbc:objective[@fbc:id='{}']".format(target_objective_id))) obj_direction_long = get_attrib(target_objective, "fbc:type") obj_direction = LONG_SHORT_DIRECTION[obj_direction_long] obj_query = OBJECTIVES_XPATH % target_objective_id coefficients = {} for sbml_objective in obj_list.findall(obj_query): rxn_id = clip(get_attrib(sbml_objective, "fbc:reaction"), "R_") try: objective_reaction = model.reactions.get_by_id(rxn_id) except KeyError: raise CobraSBMLError("Objective reaction '%s' not found" % rxn_id) try: coefficients[objective_reaction] = get_attrib( sbml_objective, "fbc:coefficient", type=number) except ValueError as e: warn(str(e)) set_objective(model, coefficients) model.solver.objective.direction = obj_direction return model
def test_gene_knockout_computation(self, salmonella): def find_gene_knockout_reactions_fast(cobra_model, gene_list): compiled_rules = get_compiled_gene_reaction_rules(cobra_model) return find_gene_knockout_reactions( cobra_model, gene_list, compiled_gene_reaction_rules=compiled_rules) def get_removed(m): return {x.id for x in m._trimmed_reactions} def test_computation(m, gene_ids, expected_reaction_ids): genes = [m.genes.get_by_id(i) for i in gene_ids] expected_reactions = { m.reactions.get_by_id(i) for i in expected_reaction_ids } removed1 = set(find_gene_knockout_reactions(m, genes)) removed2 = set(find_gene_knockout_reactions_fast(m, genes)) assert removed1 == expected_reactions assert removed2 == expected_reactions delete_model_genes(m, gene_ids, cumulative_deletions=False) assert get_removed(m) == expected_reaction_ids undelete_model_genes(m) gene_list = ['STM1067', 'STM0227'] dependent_reactions = { '3HAD121', '3HAD160', '3HAD80', '3HAD140', '3HAD180', '3HAD100', '3HAD181', '3HAD120', '3HAD60', '3HAD141', '3HAD161', 'T2DECAI', '3HAD40' } test_computation(salmonella, gene_list, dependent_reactions) test_computation(salmonella, ['STM4221'], {'PGI'}) test_computation(salmonella, ['STM1746.S'], {'4PEPTabcpp'}) # test cumulative behavior delete_model_genes(salmonella, gene_list[:1]) delete_model_genes(salmonella, gene_list[1:], cumulative_deletions=True) delete_model_genes(salmonella, ["STM4221"], cumulative_deletions=True) dependent_reactions.add('PGI') assert get_removed(salmonella) == dependent_reactions # non-cumulative following cumulative delete_model_genes(salmonella, ["STM4221"], cumulative_deletions=False) assert get_removed(salmonella) == {'PGI'} # make sure on reset that the bounds are correct reset_bound = salmonella.reactions.get_by_id("T2DECAI").upper_bound assert reset_bound == 1000. # test computation when gene name is a subset of another test_model = Model() test_reaction_1 = Reaction("test1") test_reaction_1.gene_reaction_rule = "eggs or (spam and eggspam)" test_model.add_reaction(test_reaction_1) test_computation(test_model, ["eggs"], set()) test_computation(test_model, ["eggs", "spam"], {'test1'}) # test computation with nested boolean expression test_reaction_1.gene_reaction_rule = \ "g1 and g2 and (g3 or g4 or (g5 and g6))" test_computation(test_model, ["g3"], set()) test_computation(test_model, ["g1"], {'test1'}) test_computation(test_model, ["g5"], set()) test_computation(test_model, ["g3", "g4", "g5"], {'test1'}) # test computation when gene names are python expressions test_reaction_1.gene_reaction_rule = "g1 and (for or in)" test_computation(test_model, ["for", "in"], {'test1'}) test_computation(test_model, ["for"], set()) test_reaction_1.gene_reaction_rule = "g1 and g2 and g2.conjugate" test_computation(test_model, ["g2"], {"test1"}) test_computation(test_model, ["g2.conjugate"], {"test1"}) test_reaction_1.gene_reaction_rule = "g1 and (try:' or 'except:1)" test_computation(test_model, ["try:'"], set()) test_computation(test_model, ["try:'", "'except:1"], {"test1"})