def find_gene_knockout_reactions(cobra_model, gene_list, compiled_gene_reaction_rules=None): """identify reactions which will be disabled when the genes are knocked out cobra_model: :class:`~cobra.core.Model.Model` gene_list: iterable of :class:`~cobra.core.Gene.Gene` compiled_gene_reaction_rules: dict of {reaction_id: compiled_string} If provided, this gives pre-compiled gene_reaction_rule strings. The compiled rule strings can be evaluated much faster. If a rule is not provided, the regular expression evaluation will be used. Because not all gene_reaction_rule strings can be evaluated, this dict must exclude any rules which can not be used with eval. """ potential_reactions = set() for gene in gene_list: if isinstance(gene, string_types): gene = cobra_model.genes.get_by_id(gene) potential_reactions.update(gene._reaction) gene_set = {str(i) for i in gene_list} if compiled_gene_reaction_rules is None: compiled_gene_reaction_rules = {r: parse_gpr(r.gene_reaction_rule)[0] for r in potential_reactions} return [r for r in potential_reactions if not eval_gpr(compiled_gene_reaction_rules[r], gene_set)]
def find_gene_knockout_reactions(cobra_model, gene_list, compiled_gene_reaction_rules=None): """identify reactions which will be disabled when the genes are knocked out cobra_model: :class:`~cobra.core.Model.Model` gene_list: iterable of :class:`~cobra.core.Gene.Gene` compiled_gene_reaction_rules: dict of {reaction_id: compiled_string} If provided, this gives pre-compiled gene_reaction_rule strings. The compiled rule strings can be evaluated much faster. If a rule is not provided, the regular expression evaluation will be used. Because not all gene_reaction_rule strings can be evaluated, this dict must exclude any rules which can not be used with eval. """ potential_reactions = set() for gene in gene_list: if isinstance(gene, string_types): gene = cobra_model.genes.get_by_id(gene) potential_reactions.update(gene._reaction) gene_set = {str(i) for i in gene_list} if compiled_gene_reaction_rules is None: compiled_gene_reaction_rules = { r: parse_gpr(r.gene_reaction_rule)[0] for r in potential_reactions } return [ r for r in potential_reactions if not eval_gpr(compiled_gene_reaction_rules[r], gene_set) ]
def validate_model(model): errors = [] warnings = [] errors.extend(check_reaction_bounds(model)) errors.extend(check_metabolite_compartment_formula(model)) # test gpr for reaction in model.reactions: try: parse_gpr(reaction.gene_reaction_rule) except SyntaxError: errors.append("reaction '%s' has invalid gpr '%s'" % (reaction.id, reaction.gene_reaction_rule)) # test mass balance for reaction, balance in iteritems(check_mass_balance(model)): # check if it's a demand or exchange reaction if len(reaction.metabolites) == 1: warnings.append("reaction '%s' is not balanced. Should it " "be annotated as a demand or exchange " "reaction?" % reaction.id) elif "biomass" in reaction.id.lower(): warnings.append("reaction '%s' is not balanced. Should it " "be annotated as a biomass reaction?" % reaction.id) else: warnings.append("reaction '%s' is not balanced for %s" % (reaction.id, ", ".join(sorted(balance)))) # try solving solution = model.optimize(solver='esolver') if solution.status != 'optimal': errors.append('model can not be solved (status "%s")' % solution.status) return {'errors': errors, 'warnings': warnings} # if there is no objective, then we know why the objective was low if len(model.objective.variables) == 0: warnings.append('Model has no objective function') elif solution.f <= 0: warnings.append('Model can not produce nonzero biomass') elif solution.f <= 1e-3: warnings.append('Biomass flux %s too low' % str(solution.f)) # Length is 2 for a single objective in cobrapy 0.6.x if len(model.objective.variables) > 2: warnings.append('Model should only have one reaction as the objective') return {'errors': errors, 'warnings': warnings, 'objective': solution.f}
def build_model_reaction_proteins(cobra_gpr_str): #DUMB VERSION ast = parse_gpr(cobra_gpr_str) #print(ast) #print(len(ast)) #print(ast[0].body) proteins = [] for a in set(ast[1]): proteins.append([a]) return proteins
def gene_name_reaction_rule(self): """Display gene_reaction_rule with names intead. Do NOT use this string for computation. It is intended to give a representation of the rule using more familiar gene names instead of the often cryptic ids. """ names = {i.id: i.name for i in self._genes} ast = parse_gpr(self._gene_reaction_rule)[0] return ast2str(ast, names=names)
def get_compiled_gene_reaction_rules(cobra_model): """Generates a dict of compiled gene_reaction_rules Any gene_reaction_rule expressions which cannot be compiled or do not evaluate after compiling will be excluded. The result can be used in the find_gene_knockout_reactions function to speed up evaluation of these rules. """ return {r: parse_gpr(r.gene_reaction_rule)[0] for r in cobra_model.reactions}
def reaction_confidence(rule, conf_genes): """Calculates the confidence for the reaction based on a gene-reaction rule. Args: rule (str): A gene-reaction rule. For instance "A and B". conf_genes (dict): A str->int map denoting the mapping of gene IDs to expression confidence values. Allowed confidence values are -1 (absent/do not include), 0 (unknown), 1 (low confidence), 2 (medium confidence) and 3 (high confidence). """ ast_rule, _ = parse_gpr(rule) return safe_eval_gpr(ast_rule, conf_genes)
def get_compiled_gene_reaction_rules(cobra_model): """Generates a dict of compiled gene_reaction_rules Any gene_reaction_rule expressions which cannot be compiled or do not evaluate after compiling will be excluded. The result can be used in the find_gene_knockout_reactions function to speed up evaluation of these rules. """ return { r: parse_gpr(r.gene_reaction_rule)[0] for r in cobra_model.reactions }
def gene_reaction_rule(self, new_rule): # TODO: Do this :) if get_context(self): warn("Context management not implemented for " "gene reaction rules") self._gene_reaction_rule = new_rule.strip() try: _, gene_names = parse_gpr(self._gene_reaction_rule) except (SyntaxError, TypeError) as e: if "AND" in new_rule or "OR" in new_rule: warn("uppercase AND/OR found in rule '%s' for '%s'" % (new_rule, repr(self))) new_rule = uppercase_AND.sub("and", new_rule) new_rule = uppercase_OR.sub("or", new_rule) self.gene_reaction_rule = new_rule return warn("malformed gene_reaction_rule '%s' for %s" % (new_rule, repr(self))) tmp_str = and_or_search.sub("", self._gene_reaction_rule) gene_names = set((gpr_clean.sub(" ", tmp_str).split(" "))) if "" in gene_names: gene_names.remove("") old_genes = self._genes if self._model is None: self._genes = {Gene(i) for i in gene_names} else: model_genes = self._model.genes self._genes = set() for id in gene_names: if model_genes.has_id(id): self._genes.add(model_genes.get_by_id(id)) else: new_gene = Gene(id) new_gene._model = self._model self._genes.add(new_gene) model_genes.append(new_gene) # Make the genes aware that it is involved in this reaction for g in self._genes: g._reaction.add(self) # make the old genes aware they are no longer involved in this reaction for g in old_genes: if g not in self._genes: # if an old gene is not a new gene try: g._reaction.remove(self) except KeyError: warn("could not remove old gene %s from reaction %s" % (g.id, self.id))
def gene_reaction_rule(self, new_rule): # TODO: Do this :) if get_context(self): warn("Context management not implemented for " "gene reaction rules") self._gene_reaction_rule = new_rule.strip() try: _, gene_names = parse_gpr(self._gene_reaction_rule) except (SyntaxError, TypeError) as e: if "AND" in new_rule or "OR" in new_rule: warn("uppercase AND/OR found in rule '%s' for '%s'" % (new_rule, repr(self))) new_rule = uppercase_AND.sub("and", new_rule) new_rule = uppercase_OR.sub("or", new_rule) self.gene_reaction_rule = new_rule return warn("malformed gene_reaction_rule '%s' for %s" % (new_rule, repr(self))) tmp_str = and_or_search.sub('', self._gene_reaction_rule) gene_names = set((gpr_clean.sub(' ', tmp_str).split(' '))) if '' in gene_names: gene_names.remove('') old_genes = self._genes if self._model is None: self._genes = {Gene(i) for i in gene_names} else: model_genes = self._model.genes self._genes = set() for id in gene_names: if model_genes.has_id(id): self._genes.add(model_genes.get_by_id(id)) else: new_gene = Gene(id) new_gene._model = self._model self._genes.add(new_gene) model_genes.append(new_gene) # Make the genes aware that it is involved in this reaction for g in self._genes: g._reaction.add(self) # make the old genes aware they are no longer involved in this reaction for g in old_genes: if g not in self._genes: # if an old gene is not a new gene try: g._reaction.remove(self) except: warn("could not remove old gene %s from reaction %s" % (g.id, self.id))
def functional(self): """All required enzymes for reaction are functional. Returns ------- bool True if the gene-protein-reaction (GPR) rule is fulfilled for this reaction, or if reaction is not associated to a model, otherwise False. """ if self._model: tree, _ = parse_gpr(self.gene_reaction_rule) return eval_gpr(tree, {gene.id for gene in self.genes if not gene.functional}) return True
def model_to_xml(cobra_model, units=True): xml = Element("sbml", xmlns=namespaces["sbml"], level="3", version="1", sboTerm="SBO:0000624") set_attrib(xml, "fbc:required", "false") xml_model = SubElement(xml, "model") set_attrib(xml_model, "fbc:strict", "true") if cobra_model.id is not None: xml_model.set("id", cobra_model.id) if cobra_model.name is not None: xml_model.set("name", cobra_model.name) # if using units, add in mmol/gdw/hr if units: unit_def = SubElement( SubElement(xml_model, "listOfUnitDefinitions"), "unitDefinition", id="mmol_per_gDW_per_hr") list_of_units = SubElement(unit_def, "listOfUnits") SubElement(list_of_units, "unit", kind="mole", scale="-3", multiplier="1", exponent="1") SubElement(list_of_units, "unit", kind="gram", scale="0", multiplier="1", exponent="-1") SubElement(list_of_units, "unit", kind="second", scale="0", multiplier="3600", exponent="-1") # create the element for the flux objective obj_list_tmp = SubElement(xml_model, ns("fbc:listOfObjectives")) set_attrib(obj_list_tmp, "fbc:activeObjective", "obj") obj_list_tmp = SubElement(obj_list_tmp, ns("fbc:objective")) set_attrib(obj_list_tmp, "fbc:id", "obj") set_attrib(obj_list_tmp, "fbc:type", SHORT_LONG_DIRECTION[cobra_model.objective.direction]) flux_objectives_list = SubElement(obj_list_tmp, ns("fbc:listOfFluxObjectives")) # create the element for the flux bound parameters parameter_list = SubElement(xml_model, "listOfParameters") param_attr = {"constant": "true"} if units: param_attr["units"] = "mmol_per_gDW_per_hr" # the most common bounds are the minimum, maximum, and 0 if len(cobra_model.reactions) > 0: min_value = min(cobra_model.reactions.list_attr("lower_bound")) max_value = max(cobra_model.reactions.list_attr("upper_bound")) else: min_value = -1000 max_value = 1000 SubElement(parameter_list, "parameter", value=strnum(min_value), id="cobra_default_lb", sboTerm="SBO:0000626", **param_attr) SubElement(parameter_list, "parameter", value=strnum(max_value), id="cobra_default_ub", sboTerm="SBO:0000626", **param_attr) SubElement(parameter_list, "parameter", value="0", id="cobra_0_bound", sboTerm="SBO:0000626", **param_attr) def create_bound(reaction, bound_type): """returns the str id of the appropriate bound for the reaction The bound will also be created if necessary""" value = getattr(reaction, bound_type) if value == min_value: return "cobra_default_lb" elif value == 0: return "cobra_0_bound" elif value == max_value: return "cobra_default_ub" else: param_id = "R_" + reaction.id + "_" + bound_type SubElement(parameter_list, "parameter", id=param_id, value=strnum(value), sboTerm="SBO:0000625", **param_attr) return param_id # add in compartments compartments_list = SubElement(xml_model, "listOfCompartments") compartments = cobra_model.compartments for compartment, name in iteritems(compartments): SubElement(compartments_list, "compartment", id=compartment, name=name, constant="true") # add in metabolites species_list = SubElement(xml_model, "listOfSpecies") for met in cobra_model.metabolites: species = SubElement(species_list, "species", id="M_" + met.id, # Useless required SBML parameters constant="false", boundaryCondition="false", hasOnlySubstanceUnits="false") set_attrib(species, "name", met.name) annotate_sbml_from_cobra(species, met) set_attrib(species, "compartment", met.compartment) set_attrib(species, "fbc:charge", met.charge) set_attrib(species, "fbc:chemicalFormula", met.formula) # add in genes if len(cobra_model.genes) > 0: genes_list = SubElement(xml_model, GENELIST_TAG) for gene in cobra_model.genes: gene_id = gene.id.replace(".", SBML_DOT) sbml_gene = SubElement(genes_list, GENE_TAG) set_attrib(sbml_gene, "fbc:id", "G_" + gene_id) name = gene.name if name is None or len(name) == 0: name = gene.id set_attrib(sbml_gene, "fbc:label", gene_id) set_attrib(sbml_gene, "fbc:name", name) annotate_sbml_from_cobra(sbml_gene, gene) # add in reactions reactions_list = SubElement(xml_model, "listOfReactions") for reaction in cobra_model.reactions: id = "R_" + reaction.id sbml_reaction = SubElement( reactions_list, "reaction", id=id, # Useless required SBML parameters fast="false", reversible=str(reaction.lower_bound < 0).lower()) set_attrib(sbml_reaction, "name", reaction.name) annotate_sbml_from_cobra(sbml_reaction, reaction) # add in bounds set_attrib(sbml_reaction, "fbc:upperFluxBound", create_bound(reaction, "upper_bound")) set_attrib(sbml_reaction, "fbc:lowerFluxBound", create_bound(reaction, "lower_bound")) # objective coefficient if reaction.objective_coefficient != 0: objective = SubElement(flux_objectives_list, ns("fbc:fluxObjective")) set_attrib(objective, "fbc:reaction", id) set_attrib(objective, "fbc:coefficient", strnum(reaction.objective_coefficient)) # stoichiometry reactants = {} products = {} for metabolite, stoichiomety in iteritems(reaction._metabolites): met_id = "M_" + metabolite.id if stoichiomety > 0: products[met_id] = strnum(stoichiomety) else: reactants[met_id] = strnum(-stoichiomety) if len(reactants) > 0: reactant_list = SubElement(sbml_reaction, "listOfReactants") for met_id, stoichiomety in sorted(iteritems(reactants)): SubElement(reactant_list, "speciesReference", species=met_id, stoichiometry=stoichiomety, constant="true") if len(products) > 0: product_list = SubElement(sbml_reaction, "listOfProducts") for met_id, stoichiomety in sorted(iteritems(products)): SubElement(product_list, "speciesReference", species=met_id, stoichiometry=stoichiomety, constant="true") # gene reaction rule gpr = reaction.gene_reaction_rule if gpr is not None and len(gpr) > 0: gpr = gpr.replace(".", SBML_DOT) gpr_xml = SubElement(sbml_reaction, GPR_TAG) try: parsed, _ = parse_gpr(gpr) construct_gpr_xml(gpr_xml, parsed.body) except Exception as e: print("failed on '%s' in %s" % (reaction.gene_reaction_rule, repr(reaction))) raise e return xml
def read_sbml_ec_model( filename: str, number: float = float, # re.Pattern does not exist in py36 so this type hint cannot be added now f_replace=F_REPLACE, set_missing_bounds: bool = False, hardcoded_rev_reactions: bool = True, **kwargs, ) -> Model: """Create `geckopy.Model` from SBMLDocument. Parameters ---------- filename: str number: data type of stoichiometry: {float, int} In which data type should the stoichiometry be parsed. f_replace : dict of replacement functions for id replacement set_missing_bounds : flag to set missing bounds hardcoded_rev_reactions: bool if reversible reaction to account for proteins being consumed on both directions are written explicitly for in the SBML Returns ------- cobra.core.Model """ try: fsanitized = str(filename) if isinstance(filename, Path) else filename doc = _get_doc_from_filename(fsanitized) except IOError as e: raise e except Exception as original_error: raise CobraSBMLError( "Something went wrong reading the SBML model. Most likely the SBML" " model is not valid. Please check that your model is valid using " "the `cobra.io.sbml.validate_sbml_model` function or via the " "online validator at http://sbml.org/validator .\n" "\t`(model, errors) = validate_sbml_model(filename)`" "\nIf the model is valid and cannot be read please open an issue " f"at https://github.com/opencobra/cobrapy/issues: {original_error}" ) if f_replace is None: f_replace = {} # SBML model model: libsbml.Model = doc.getModel() if model is None: raise CobraSBMLError("No SBML model detected in file.") model_fbc: libsbml.FbcModelPlugin = model.getPlugin("fbc") if not model_fbc: LOGGER.warning("Model does not contain SBML fbc package information.") else: if not model_fbc.isSetStrict(): LOGGER.warning('Loading SBML model without fbc:strict="true"') # fbc-v1 (legacy) doc_fbc = doc.getPlugin("fbc") # type: libsbml.FbcSBMLDocumentPlugin fbc_version = doc_fbc.getPackageVersion() if fbc_version == 1: LOGGER.warning("Loading SBML with fbc-v1 (models should be encoded" " using fbc-v2)") conversion_properties = libsbml.ConversionProperties() conversion_properties.addOption("convert fbc v1 to fbc v2", True, "Convert FBC-v1 model to FBC-v2") result = doc.convert(conversion_properties) if result != libsbml.LIBSBML_OPERATION_SUCCESS: raise Exception("Conversion of SBML fbc v1 to fbc v2 failed") # Model model_id = model.getIdAttribute() if not libsbml.SyntaxChecker.isValidSBMLSId(model_id): LOGGER.error("'%s' is not a valid SBML 'SId'." % model_id) geckopy_model = Model(model_id, hardcoded_rev_reactions=hardcoded_rev_reactions) geckopy_model.name = model.getName() # meta information meta = { "model.id": model_id, "level": model.getLevel(), "version": model.getVersion(), "packages": [], } # History creators = [] created = None if model.isSetModelHistory(): history = model.getModelHistory() # type: libsbml.ModelHistory if history.isSetCreatedDate(): created = history.getCreatedDate() for c in history.getListCreators(): # type: libsbml.ModelCreator creators.append({ "familyName": c.getFamilyName() if c.isSetFamilyName() else None, "givenName": c.getGivenName() if c.isSetGivenName() else None, "organisation": c.getOrganisation() if c.isSetOrganisation() else None, "email": c.getEmail() if c.isSetEmail() else None, }) meta["creators"] = creators meta["created"] = created meta["notes"] = _parse_notes_dict(doc) meta["annotation"] = _parse_annotations(doc) info = "<{}> SBML L{}V{}".format(model_id, model.getLevel(), model.getVersion()) packages = {} for k in range(doc.getNumPlugins()): plugin = doc.getPlugin(k) # type:libsbml.SBasePlugin key, value = plugin.getPackageName(), plugin.getPackageVersion() packages[key] = value info += ", {}-v{}".format(key, value) if key not in ["fbc", "groups", "l3v2extendedmath"]: LOGGER.warning( "SBML package '%s' not supported by cobrapy, " "information is not parsed", key, ) meta["info"] = info meta["packages"] = packages geckopy_model._sbml = meta # notes and annotations geckopy_model.notes = _parse_notes_dict(model) geckopy_model.annotation = _parse_annotations(model) # Compartments # FIXME: update with new compartments compartments = {} for (compartment) in model.getListOfCompartments( ): # noqa: E501 type: libsbml.Compartment cid = _check_required(compartment, compartment.getIdAttribute(), "id") compartments[cid] = compartment.getName() geckopy_model.compartments = compartments # Species metabolites = [] # proteins that rely on the naming convention "prot_UNIPROT_ID" will be # catched here. Those who are annotated by groups membership will be parsed # after the groups are processed. proteins = [] boundary_metabolites = [] if model.getNumSpecies() == 0: LOGGER.warning("No metabolites in model") for specie in model.getListOfSpecies(): # type: libsbml.Species sid = _check_required(specie, specie.getIdAttribute(), "id") if f_replace and F_SPECIE in f_replace: sid = f_replace[F_SPECIE](sid) met = Metabolite(sid) met.name = specie.getName() met.notes = _parse_notes_dict(specie) met.annotation = _parse_annotations(specie) met.compartment = specie.getCompartment() initial_amount = specie.getInitialAmount() specie_fbc = specie.getPlugin("fbc") # type: libsbml.FbcSpeciesPlugin if specie_fbc: met.charge = specie_fbc.getCharge() met.formula = specie_fbc.getChemicalFormula() else: if specie.isSetCharge(): LOGGER.warning( "Use of the species charge attribute is " "discouraged, use fbc:charge " "instead: %s", specie, ) met.charge = specie.getCharge() else: if "CHARGE" in met.notes: LOGGER.warning( "Use of CHARGE in the notes element is " "discouraged, use fbc:charge " "instead: %s", specie, ) try: met.charge = int(met.notes["CHARGE"]) except ValueError: # handle nan, na, NA, ... pass if "FORMULA" in met.notes: LOGGER.warning( "Use of FORMULA in the notes element is " "discouraged, use fbc:chemicalFormula " "instead: %s", specie, ) met.formula = met.notes["FORMULA"] # Detect boundary metabolites if specie.getBoundaryCondition() is True: boundary_metabolites.append(met) if not PROT_PATTERN.match(met.id): metabolites.append(met) else: proteins.append(Protein(met, concentration=initial_amount)) geckopy_model.add_metabolites(metabolites) geckopy_model.add_proteins(proteins) # Add exchange reactions for boundary metabolites ex_reactions = [] for met in boundary_metabolites: ex_rid = "EX_{}".format(met.id) ex_reaction = Reaction(ex_rid) ex_reaction.name = ex_rid ex_reaction.annotation = {"sbo": SBO_EXCHANGE_REACTION} ex_reaction.lower_bound = config.lower_bound ex_reaction.upper_bound = config.upper_bound LOGGER.warning("Adding exchange reaction %s with default bounds " "for boundary metabolite: %s." % (ex_reaction.id, met.id)) # species is reactant ex_reaction.add_metabolites({met: -1}) ex_reactions.append(ex_reaction) geckopy_model.add_reactions(ex_reactions) # Genes if model_fbc: for (gp) in model_fbc.getListOfGeneProducts( ): # noqa: E501 type: libsbml.GeneProduct gid = _check_required(gp, gp.getIdAttribute(), "id") if f_replace and F_GENE in f_replace: gid = f_replace[F_GENE](gid) cobra_gene = Gene(gid) cobra_gene.name = gp.getName() if cobra_gene.name is None: cobra_gene.name = gid cobra_gene.annotation = _parse_annotations(gp) cobra_gene.notes = _parse_notes_dict(gp) geckopy_model.genes.append(cobra_gene) else: for (cobra_reaction) in model.getListOfReactions( ): # noqa: E501 type: libsbml.Reaction # fallback to notes information notes = _parse_notes_dict(cobra_reaction) if "GENE ASSOCIATION" in notes: gpr = notes["GENE ASSOCIATION"] elif "GENE_ASSOCIATION" in notes: gpr = notes["GENE_ASSOCIATION"] else: gpr = "" if len(gpr) > 0: gpr = gpr.replace("(", ";") gpr = gpr.replace(")", ";") gpr = gpr.replace("or", ";") gpr = gpr.replace("and", ";") # Interaction of the above replacements can lead to multiple # ;, which results in empty gids gids = [t.strip() for t in gpr.split(";")] gids = set(gids).difference({""}) # create missing genes for gid in gids: if f_replace and F_GENE in f_replace: gid = f_replace[F_GENE](gid) if gid not in geckopy_model.genes: cobra_gene = Gene(gid) cobra_gene.name = gid geckopy_model.genes.append(cobra_gene) # GPR rules def process_association(ass): """Recursively convert gpr association to a gpr string. Defined as inline functions to not pass the replacement dict around. """ if ass.isFbcOr(): return " ".join([ "(", " or ".join( process_association(c) for c in ass.getListOfAssociations()), ")", ]) elif ass.isFbcAnd(): return " ".join([ "(", " and ".join( process_association(c) for c in ass.getListOfAssociations()), ")", ]) elif ass.isGeneProductRef(): gid = ass.getGeneProduct() if f_replace and F_GENE in f_replace: return f_replace[F_GENE](gid) else: return gid # Reactions missing_bounds = False reactions = [] if model.getNumReactions() == 0: LOGGER.warning("No reactions in model") for reaction in model.getListOfReactions(): # type: libsbml.Reaction rid = _check_required(reaction, reaction.getIdAttribute(), "id") # proteins are parsed based on Species, prot exchanges are ignored if PROT_EX_PATTERN.search(rid): continue if f_replace and F_REACTION in f_replace: rid = f_replace[F_REACTION](rid) cobra_reaction = Reaction(rid) cobra_reaction.name = reaction.getName() cobra_reaction.annotation = _parse_annotations(reaction) cobra_reaction.notes = _parse_notes_dict(reaction) # set bounds p_ub, p_lb = None, None r_fbc = reaction.getPlugin("fbc") # type: libsbml.FbcReactionPlugin if r_fbc: # bounds in fbc lb_id = r_fbc.getLowerFluxBound() if lb_id: p_lb = model.getParameter(lb_id) # type: libsbml.Parameter if p_lb and p_lb.getConstant() and (p_lb.getValue() is not None): cobra_reaction.lower_bound = p_lb.getValue() else: raise CobraSBMLError("No constant bound '%s' for " "reaction: %s" % (p_lb, reaction)) ub_id = r_fbc.getUpperFluxBound() if ub_id: p_ub = model.getParameter(ub_id) # type: libsbml.Parameter if p_ub and p_ub.getConstant() and (p_ub.getValue() is not None): cobra_reaction.upper_bound = p_ub.getValue() else: raise CobraSBMLError("No constant bound '%s' for " "reaction: %s" % (p_ub, reaction)) elif reaction.isSetKineticLaw(): # some legacy models encode bounds in kinetic laws klaw = reaction.getKineticLaw() # type: libsbml.KineticLaw p_lb = klaw.getParameter( "LOWER_BOUND") # noqa: E501 type: libsbml.LocalParameter if p_lb: cobra_reaction.lower_bound = p_lb.getValue() p_ub = klaw.getParameter( "UPPER_BOUND") # noqa: E501 type: libsbml.LocalParameter if p_ub: cobra_reaction.upper_bound = p_ub.getValue() if p_ub is not None or p_lb is not None: LOGGER.warning( "Encoding LOWER_BOUND and UPPER_BOUND in " "KineticLaw is discouraged, " "use fbc:fluxBounds instead: %s", reaction, ) if p_lb is None: missing_bounds = True lower_bound = config.lower_bound cobra_reaction.lower_bound = lower_bound LOGGER.warning( "Missing lower flux bound set to '%s' for " " reaction: '%s'", lower_bound, reaction, ) if p_ub is None: missing_bounds = True upper_bound = config.upper_bound cobra_reaction.upper_bound = upper_bound LOGGER.warning( "Missing upper flux bound set to '%s' for " " reaction: '%s'", upper_bound, reaction, ) # add reaction reactions.append(cobra_reaction) # parse equation stoichiometry = defaultdict(lambda: 0) for (sref) in reaction.getListOfReactants( ): # noqa: E501 type: libsbml.SpeciesReference sid = _check_required(sref, sref.getSpecies(), "species") if f_replace and F_SPECIE in f_replace: sid = f_replace[F_SPECIE](sid) stoichiometry[sid] -= number( _check_required(sref, sref.getStoichiometry(), "stoichiometry")) for (sref) in reaction.getListOfProducts( ): # noqa: E501 type: libsbml.SpeciesReference sid = _check_required(sref, sref.getSpecies(), "species") if f_replace and F_SPECIE in f_replace: sid = f_replace[F_SPECIE](sid) stoichiometry[sid] += number( _check_required(sref, sref.getStoichiometry(), "stoichiometry")) # convert to metabolite objects object_stoichiometry = {} for met_id in stoichiometry: target_set = (geckopy_model.proteins if met_id in geckopy_model.proteins else geckopy_model.metabolites) metabolite = target_set.get_by_id(met_id) object_stoichiometry[metabolite] = stoichiometry[met_id] cobra_reaction.add_metabolites(object_stoichiometry) # GPR if r_fbc: gpr = "" gpa = (r_fbc.getGeneProductAssociation() ) # noqa: E501 type: libsbml.GeneProductAssociation if gpa is not None: association = (gpa.getAssociation() ) # noqa: E501 type: libsbml.FbcAssociation gpr = process_association(association) else: # fallback to notes information notes = cobra_reaction.notes if "GENE ASSOCIATION" in notes: gpr = notes["GENE ASSOCIATION"] elif "GENE_ASSOCIATION" in notes: gpr = notes["GENE_ASSOCIATION"] else: gpr = "" if len(gpr) > 0: LOGGER.warning( "Use of GENE ASSOCIATION or GENE_ASSOCIATION " "in the notes element is discouraged, use " "fbc:gpr instead: %s", reaction, ) if f_replace and F_GENE in f_replace: gpr = " ".join(f_replace[F_GENE](t) for t in gpr.split(" ")) # remove outside parenthesis, if any if gpr.startswith("(") and gpr.endswith(")"): try: parse_gpr(gpr[1:-1].strip()) gpr = gpr[1:-1].strip() except (SyntaxError, TypeError) as e: LOGGER.warning( f"Removing parenthesis from gpr {gpr} leads to " f"an error, so keeping parenthesis, error: {e}", ) cobra_reaction.gene_reaction_rule = gpr geckopy_model.add_reactions(reactions) # Objective obj_direction = "max" coefficients = {} if model_fbc: obj_list = (model_fbc.getListOfObjectives() ) # noqa: E501 type: libsbml.ListOfObjectives if obj_list is None: LOGGER.warning("listOfObjectives element not found") elif obj_list.size() == 0: LOGGER.warning("No objective in listOfObjectives") elif not obj_list.getActiveObjective(): LOGGER.warning("No active objective in listOfObjectives") else: obj_id = obj_list.getActiveObjective() obj = model_fbc.getObjective(obj_id) # type: libsbml.Objective obj_direction = LONG_SHORT_DIRECTION[obj.getType()] for (flux_obj) in (obj.getListOfFluxObjectives() ): # noqa: E501 type: libsbml.FluxObjective rid = flux_obj.getReaction() if f_replace and F_REACTION in f_replace: rid = f_replace[F_REACTION](rid) try: objective_reaction = geckopy_model.reactions.get_by_id(rid) except KeyError: raise CobraSBMLError("Objective reaction '%s' " "not found" % rid) try: coefficients[objective_reaction] = number( flux_obj.getCoefficient()) except ValueError as e: LOGGER.warning(str(e)) else: # some legacy models encode objective coefficients in kinetic laws for reaction in model.getListOfReactions(): # type: libsbml.Reaction if reaction.isSetKineticLaw(): klaw = reaction.getKineticLaw() # type: libsbml.KineticLaw p_oc = klaw.getParameter( "OBJECTIVE_COEFFICIENT") # type: libsbml.LocalParameter if p_oc: rid = _check_required(reaction, reaction.getIdAttribute(), "id") if f_replace and F_REACTION in f_replace: rid = f_replace[F_REACTION](rid) try: objective_reaction = geckopy_model.reactions.get_by_id( rid) except KeyError: raise CobraSBMLError( "Objective reaction '%s' " "not found", rid) try: coefficients[objective_reaction] = number( p_oc.getValue()) except ValueError as e: LOGGER.warning(str(e)) LOGGER.warning( "Encoding OBJECTIVE_COEFFICIENT in " "KineticLaw is discouraged, " "use fbc:fluxObjective " "instead: %s", reaction, ) if len(coefficients) == 0: LOGGER.error("No objective coefficients in model. Unclear what should " "be optimized") set_objective(geckopy_model, coefficients) geckopy_model.solver.objective.direction = obj_direction # parse groups model_groups = model.getPlugin("groups") # type: libsbml.GroupsModelPlugin groups = [] if model_groups: # calculate hashmaps to lookup objects in O(1) sid_map = {} metaid_map = {} for obj_list in [ model.getListOfCompartments(), model.getListOfSpecies(), model.getListOfReactions(), model_groups.getListOfGroups(), ]: for sbase in obj_list: # type: libsbml.SBase if sbase.isSetId(): sid_map[sbase.getIdAttribute()] = sbase if sbase.isSetMetaId(): metaid_map[sbase.getMetaId()] = sbase # create groups for group in model_groups.getListOfGroups(): # type: libsbml.Group gid = _check_required(group, group.getIdAttribute(), "id") if f_replace and F_GROUP in f_replace: gid = f_replace[F_GROUP](gid) cobra_group = Group(gid) cobra_group.name = group.getName() if group.isSetKind(): cobra_group.kind = group.getKindAsString() cobra_group.annotation = _parse_annotations(group) cobra_group.notes = _parse_notes_dict(group) cobra_members = [] for member in group.getListOfMembers(): # type: libsbml.Member if member.isSetIdRef(): obj = sid_map[member.getIdRef()] elif member.isSetMetaIdRef(): obj = metaid_map[member.getMetaIdRef()] typecode = obj.getTypeCode() obj_id = _check_required(obj, obj.getIdAttribute(), "id") # id replacements cobra_member = None if typecode == libsbml.SBML_SPECIES: if f_replace and F_SPECIE in f_replace: obj_id = f_replace[F_SPECIE](obj_id) try: cobra_member = geckopy_model.metabolites.get_by_id( obj_id) except KeyError: cobra_member = geckopy_model.proteins.get_by_id(obj_id) elif typecode == libsbml.SBML_REACTION: if f_replace and F_REACTION in f_replace: obj_id = f_replace[F_REACTION](obj_id) cobra_member = geckopy_model.reactions.get_by_id(obj_id) elif typecode == libsbml.SBML_FBC_GENEPRODUCT: if f_replace and F_GENE in f_replace: obj_id = f_replace[F_GENE](obj_id) cobra_member = geckopy_model.genes.get_by_id(obj_id) else: LOGGER.warning("Member %s could not be added to group %s." "unsupported type code: " "%s" % (member, group, typecode)) if cobra_member: cobra_members.append(cobra_member) cobra_group.add_members(cobra_members) groups.append(cobra_group) else: # parse deprecated subsystems on reactions groups_dict = {} for cobra_reaction in geckopy_model.reactions: if "SUBSYSTEM" in cobra_reaction.notes: g_name = cobra_reaction.notes["SUBSYSTEM"] if g_name in groups_dict: groups_dict[g_name].append(cobra_reaction) else: groups_dict[g_name] = [cobra_reaction] for gid, cobra_members in groups_dict.items(): if f_replace and F_GROUP in f_replace: gid = f_replace[F_GROUP](gid) cobra_group = Group(gid, name=gid, kind="collection") cobra_group.add_members(cobra_members) groups.append(cobra_group) geckopy_model.add_groups(groups) # now add everything under group Proteins to model.proteins if it was not # already added based on naming conventions if geckopy_model.groups.query("Protein"): g_proteins = geckopy_model.groups.Protein.members.copy() g_proteins = { prot: {reac: reac.metabolites[prot] for reac in prot.reactions} for prot in g_proteins if prot not in geckopy_model.proteins } if g_proteins: geckopy_model.remove_metabolites(g_proteins.keys()) geckopy_model.add_proteins([Protein(prot) for prot in g_proteins]) for prot, reactions in g_proteins.items(): for reac, stoich in reactions.items(): # reverse the (negative) stoichiometry coefficient to kcat # we expect that Proteins that are identified only by their # group are respecting the specification and do form part # of reactions # TODO: provide options to tune this behvior reac.add_protein(prot.id, -1 / (stoich * 3600)) # general hint for missing flux bounds if missing_bounds: LOGGER.warning( "Missing flux bounds on reactions set to default bounds." "As best practise and to avoid confusion flux bounds " "should be set explicitly on all reactions.") return geckopy_model