Example #1
0
def find_gene_knockout_reactions(cobra_model, gene_list,
                                 compiled_gene_reaction_rules=None):
    """identify reactions which will be disabled when the genes are knocked out

    cobra_model: :class:`~cobra.core.Model.Model`

    gene_list: iterable of :class:`~cobra.core.Gene.Gene`

    compiled_gene_reaction_rules: dict of {reaction_id: compiled_string}
        If provided, this gives pre-compiled gene_reaction_rule strings.
        The compiled rule strings can be evaluated much faster. If a rule
        is not provided, the regular expression evaluation will be used.
        Because not all gene_reaction_rule strings can be evaluated, this
        dict must exclude any rules which can not be used with eval.

    """
    potential_reactions = set()
    for gene in gene_list:
        if isinstance(gene, string_types):
            gene = cobra_model.genes.get_by_id(gene)
        potential_reactions.update(gene._reaction)
    gene_set = {str(i) for i in gene_list}
    if compiled_gene_reaction_rules is None:
        compiled_gene_reaction_rules = {r: parse_gpr(r.gene_reaction_rule)[0]
                                        for r in potential_reactions}

    return [r for r in potential_reactions
            if not eval_gpr(compiled_gene_reaction_rules[r], gene_set)]
Example #2
0
def find_gene_knockout_reactions(cobra_model,
                                 gene_list,
                                 compiled_gene_reaction_rules=None):
    """identify reactions which will be disabled when the genes are knocked out

    cobra_model: :class:`~cobra.core.Model.Model`

    gene_list: iterable of :class:`~cobra.core.Gene.Gene`

    compiled_gene_reaction_rules: dict of {reaction_id: compiled_string}
        If provided, this gives pre-compiled gene_reaction_rule strings.
        The compiled rule strings can be evaluated much faster. If a rule
        is not provided, the regular expression evaluation will be used.
        Because not all gene_reaction_rule strings can be evaluated, this
        dict must exclude any rules which can not be used with eval.

    """
    potential_reactions = set()
    for gene in gene_list:
        if isinstance(gene, string_types):
            gene = cobra_model.genes.get_by_id(gene)
        potential_reactions.update(gene._reaction)
    gene_set = {str(i) for i in gene_list}
    if compiled_gene_reaction_rules is None:
        compiled_gene_reaction_rules = {
            r: parse_gpr(r.gene_reaction_rule)[0]
            for r in potential_reactions
        }

    return [
        r for r in potential_reactions
        if not eval_gpr(compiled_gene_reaction_rules[r], gene_set)
    ]
def validate_model(model):
    errors = []
    warnings = []
    errors.extend(check_reaction_bounds(model))
    errors.extend(check_metabolite_compartment_formula(model))
    # test gpr
    for reaction in model.reactions:
        try:
            parse_gpr(reaction.gene_reaction_rule)
        except SyntaxError:
            errors.append("reaction '%s' has invalid gpr '%s'" %
                          (reaction.id, reaction.gene_reaction_rule))
    # test mass balance
    for reaction, balance in iteritems(check_mass_balance(model)):
        # check if it's a demand or exchange reaction
        if len(reaction.metabolites) == 1:
            warnings.append("reaction '%s' is not balanced. Should it "
                            "be annotated as a demand or exchange "
                            "reaction?" % reaction.id)
        elif "biomass" in reaction.id.lower():
            warnings.append("reaction '%s' is not balanced. Should it "
                            "be annotated as a biomass reaction?" %
                            reaction.id)
        else:
            warnings.append("reaction '%s' is not balanced for %s" %
                            (reaction.id, ", ".join(sorted(balance))))

    # try solving
    solution = model.optimize(solver='esolver')
    if solution.status != 'optimal':
        errors.append('model can not be solved (status "%s")' %
                      solution.status)
        return {'errors': errors, 'warnings': warnings}

    # if there is no objective, then we know why the objective was low
    if len(model.objective.variables) == 0:
        warnings.append('Model has no objective function')
    elif solution.f <= 0:
        warnings.append('Model can not produce nonzero biomass')
    elif solution.f <= 1e-3:
        warnings.append('Biomass flux %s too low' % str(solution.f))

    # Length is 2 for a single objective in cobrapy 0.6.x
    if len(model.objective.variables) > 2:
        warnings.append('Model should only have one reaction as the objective')

    return {'errors': errors, 'warnings': warnings, 'objective': solution.f}
Example #4
0
def build_model_reaction_proteins(cobra_gpr_str):
    #DUMB VERSION
    ast = parse_gpr(cobra_gpr_str)
    #print(ast)
    #print(len(ast))
    #print(ast[0].body)
    proteins = []
    for a in set(ast[1]):
        proteins.append([a])
    return proteins
Example #5
0
    def gene_name_reaction_rule(self):
        """Display gene_reaction_rule with names intead.

        Do NOT use this string for computation. It is intended to give a
        representation of the rule using more familiar gene names instead of
        the often cryptic ids.

        """
        names = {i.id: i.name for i in self._genes}
        ast = parse_gpr(self._gene_reaction_rule)[0]
        return ast2str(ast, names=names)
Example #6
0
    def gene_name_reaction_rule(self):
        """Display gene_reaction_rule with names intead.

        Do NOT use this string for computation. It is intended to give a
        representation of the rule using more familiar gene names instead of
        the often cryptic ids.

        """
        names = {i.id: i.name for i in self._genes}
        ast = parse_gpr(self._gene_reaction_rule)[0]
        return ast2str(ast, names=names)
Example #7
0
def get_compiled_gene_reaction_rules(cobra_model):
    """Generates a dict of compiled gene_reaction_rules

    Any gene_reaction_rule expressions which cannot be compiled or do not
    evaluate after compiling will be excluded. The result can be used in the
    find_gene_knockout_reactions function to speed up evaluation of these
    rules.

    """
    return {r: parse_gpr(r.gene_reaction_rule)[0]
            for r in cobra_model.reactions}
Example #8
0
def reaction_confidence(rule, conf_genes):
    """Calculates the confidence for the reaction based on a gene-reaction
    rule.

    Args:
        rule (str): A gene-reaction rule. For instance "A and B".
        conf_genes (dict): A str->int map denoting the mapping of gene IDs
            to expression confidence values. Allowed confidence values are -1
            (absent/do not include), 0 (unknown), 1 (low confidence),
            2 (medium confidence) and 3 (high confidence).
    """
    ast_rule, _ = parse_gpr(rule)
    return safe_eval_gpr(ast_rule, conf_genes)
Example #9
0
def get_compiled_gene_reaction_rules(cobra_model):
    """Generates a dict of compiled gene_reaction_rules

    Any gene_reaction_rule expressions which cannot be compiled or do not
    evaluate after compiling will be excluded. The result can be used in the
    find_gene_knockout_reactions function to speed up evaluation of these
    rules.

    """
    return {
        r: parse_gpr(r.gene_reaction_rule)[0]
        for r in cobra_model.reactions
    }
Example #10
0
    def gene_reaction_rule(self, new_rule):

        # TODO: Do this :)
        if get_context(self):
            warn("Context management not implemented for "
                 "gene reaction rules")

        self._gene_reaction_rule = new_rule.strip()
        try:
            _, gene_names = parse_gpr(self._gene_reaction_rule)
        except (SyntaxError, TypeError) as e:
            if "AND" in new_rule or "OR" in new_rule:
                warn("uppercase AND/OR found in rule '%s' for '%s'" %
                     (new_rule, repr(self)))
                new_rule = uppercase_AND.sub("and", new_rule)
                new_rule = uppercase_OR.sub("or", new_rule)
                self.gene_reaction_rule = new_rule
                return
            warn("malformed gene_reaction_rule '%s' for %s" %
                 (new_rule, repr(self)))
            tmp_str = and_or_search.sub("", self._gene_reaction_rule)
            gene_names = set((gpr_clean.sub(" ", tmp_str).split(" ")))
        if "" in gene_names:
            gene_names.remove("")
        old_genes = self._genes
        if self._model is None:
            self._genes = {Gene(i) for i in gene_names}
        else:
            model_genes = self._model.genes
            self._genes = set()
            for id in gene_names:
                if model_genes.has_id(id):
                    self._genes.add(model_genes.get_by_id(id))
                else:
                    new_gene = Gene(id)
                    new_gene._model = self._model
                    self._genes.add(new_gene)
                    model_genes.append(new_gene)

        # Make the genes aware that it is involved in this reaction
        for g in self._genes:
            g._reaction.add(self)

        # make the old genes aware they are no longer involved in this reaction
        for g in old_genes:
            if g not in self._genes:  # if an old gene is not a new gene
                try:
                    g._reaction.remove(self)
                except KeyError:
                    warn("could not remove old gene %s from reaction %s" %
                         (g.id, self.id))
Example #11
0
    def gene_reaction_rule(self, new_rule):

        # TODO: Do this :)
        if get_context(self):
            warn("Context management not implemented for "
                 "gene reaction rules")

        self._gene_reaction_rule = new_rule.strip()
        try:
            _, gene_names = parse_gpr(self._gene_reaction_rule)
        except (SyntaxError, TypeError) as e:
            if "AND" in new_rule or "OR" in new_rule:
                warn("uppercase AND/OR found in rule '%s' for '%s'" %
                     (new_rule, repr(self)))
                new_rule = uppercase_AND.sub("and", new_rule)
                new_rule = uppercase_OR.sub("or", new_rule)
                self.gene_reaction_rule = new_rule
                return
            warn("malformed gene_reaction_rule '%s' for %s" %
                 (new_rule, repr(self)))
            tmp_str = and_or_search.sub('', self._gene_reaction_rule)
            gene_names = set((gpr_clean.sub(' ', tmp_str).split(' ')))
        if '' in gene_names:
            gene_names.remove('')
        old_genes = self._genes
        if self._model is None:
            self._genes = {Gene(i) for i in gene_names}
        else:
            model_genes = self._model.genes
            self._genes = set()
            for id in gene_names:
                if model_genes.has_id(id):
                    self._genes.add(model_genes.get_by_id(id))
                else:
                    new_gene = Gene(id)
                    new_gene._model = self._model
                    self._genes.add(new_gene)
                    model_genes.append(new_gene)

        # Make the genes aware that it is involved in this reaction
        for g in self._genes:
            g._reaction.add(self)

        # make the old genes aware they are no longer involved in this reaction
        for g in old_genes:
            if g not in self._genes:  # if an old gene is not a new gene
                try:
                    g._reaction.remove(self)
                except:
                    warn("could not remove old gene %s from reaction %s" %
                         (g.id, self.id))
Example #12
0
    def functional(self):
        """All required enzymes for reaction are functional.

        Returns
        -------
        bool
            True if the gene-protein-reaction (GPR) rule is fulfilled for
            this reaction, or if reaction is not associated to a model,
            otherwise False.
        """
        if self._model:
            tree, _ = parse_gpr(self.gene_reaction_rule)
            return eval_gpr(tree, {gene.id for gene in self.genes if
                                   not gene.functional})
        return True
Example #13
0
    def functional(self):
        """All required enzymes for reaction are functional.

        Returns
        -------
        bool
            True if the gene-protein-reaction (GPR) rule is fulfilled for
            this reaction, or if reaction is not associated to a model,
            otherwise False.
        """
        if self._model:
            tree, _ = parse_gpr(self.gene_reaction_rule)
            return eval_gpr(tree, {gene.id for gene in self.genes if
                                   not gene.functional})
        return True
Example #14
0
def model_to_xml(cobra_model, units=True):
    xml = Element("sbml", xmlns=namespaces["sbml"], level="3", version="1",
                  sboTerm="SBO:0000624")
    set_attrib(xml, "fbc:required", "false")
    xml_model = SubElement(xml, "model")
    set_attrib(xml_model, "fbc:strict", "true")
    if cobra_model.id is not None:
        xml_model.set("id", cobra_model.id)
    if cobra_model.name is not None:
        xml_model.set("name", cobra_model.name)

    # if using units, add in mmol/gdw/hr
    if units:
        unit_def = SubElement(
            SubElement(xml_model, "listOfUnitDefinitions"),
            "unitDefinition", id="mmol_per_gDW_per_hr")
        list_of_units = SubElement(unit_def, "listOfUnits")
        SubElement(list_of_units, "unit", kind="mole", scale="-3",
                   multiplier="1", exponent="1")
        SubElement(list_of_units, "unit", kind="gram", scale="0",
                   multiplier="1", exponent="-1")
        SubElement(list_of_units, "unit", kind="second", scale="0",
                   multiplier="3600", exponent="-1")

    # create the element for the flux objective
    obj_list_tmp = SubElement(xml_model, ns("fbc:listOfObjectives"))
    set_attrib(obj_list_tmp, "fbc:activeObjective", "obj")
    obj_list_tmp = SubElement(obj_list_tmp, ns("fbc:objective"))
    set_attrib(obj_list_tmp, "fbc:id", "obj")
    set_attrib(obj_list_tmp, "fbc:type",
               SHORT_LONG_DIRECTION[cobra_model.objective.direction])
    flux_objectives_list = SubElement(obj_list_tmp,
                                      ns("fbc:listOfFluxObjectives"))

    # create the element for the flux bound parameters
    parameter_list = SubElement(xml_model, "listOfParameters")
    param_attr = {"constant": "true"}
    if units:
        param_attr["units"] = "mmol_per_gDW_per_hr"
    # the most common bounds are the minimum, maximum, and 0
    if len(cobra_model.reactions) > 0:
        min_value = min(cobra_model.reactions.list_attr("lower_bound"))
        max_value = max(cobra_model.reactions.list_attr("upper_bound"))
    else:
        min_value = -1000
        max_value = 1000

    SubElement(parameter_list, "parameter", value=strnum(min_value),
               id="cobra_default_lb", sboTerm="SBO:0000626", **param_attr)
    SubElement(parameter_list, "parameter", value=strnum(max_value),
               id="cobra_default_ub", sboTerm="SBO:0000626", **param_attr)
    SubElement(parameter_list, "parameter", value="0",
               id="cobra_0_bound", sboTerm="SBO:0000626", **param_attr)

    def create_bound(reaction, bound_type):
        """returns the str id of the appropriate bound for the reaction

        The bound will also be created if necessary"""
        value = getattr(reaction, bound_type)
        if value == min_value:
            return "cobra_default_lb"
        elif value == 0:
            return "cobra_0_bound"
        elif value == max_value:
            return "cobra_default_ub"
        else:
            param_id = "R_" + reaction.id + "_" + bound_type
            SubElement(parameter_list, "parameter", id=param_id,
                       value=strnum(value), sboTerm="SBO:0000625",
                       **param_attr)
            return param_id

    # add in compartments
    compartments_list = SubElement(xml_model, "listOfCompartments")
    compartments = cobra_model.compartments
    for compartment, name in iteritems(compartments):
        SubElement(compartments_list, "compartment", id=compartment, name=name,
                   constant="true")

    # add in metabolites
    species_list = SubElement(xml_model, "listOfSpecies")
    for met in cobra_model.metabolites:
        species = SubElement(species_list, "species",
                             id="M_" + met.id,
                             # Useless required SBML parameters
                             constant="false",
                             boundaryCondition="false",
                             hasOnlySubstanceUnits="false")
        set_attrib(species, "name", met.name)
        annotate_sbml_from_cobra(species, met)
        set_attrib(species, "compartment", met.compartment)
        set_attrib(species, "fbc:charge", met.charge)
        set_attrib(species, "fbc:chemicalFormula", met.formula)

    # add in genes
    if len(cobra_model.genes) > 0:
        genes_list = SubElement(xml_model, GENELIST_TAG)
        for gene in cobra_model.genes:
            gene_id = gene.id.replace(".", SBML_DOT)
            sbml_gene = SubElement(genes_list, GENE_TAG)
            set_attrib(sbml_gene, "fbc:id", "G_" + gene_id)
            name = gene.name
            if name is None or len(name) == 0:
                name = gene.id
            set_attrib(sbml_gene, "fbc:label", gene_id)
            set_attrib(sbml_gene, "fbc:name", name)
            annotate_sbml_from_cobra(sbml_gene, gene)

    # add in reactions
    reactions_list = SubElement(xml_model, "listOfReactions")
    for reaction in cobra_model.reactions:
        id = "R_" + reaction.id
        sbml_reaction = SubElement(
            reactions_list, "reaction",
            id=id,
            # Useless required SBML parameters
            fast="false",
            reversible=str(reaction.lower_bound < 0).lower())
        set_attrib(sbml_reaction, "name", reaction.name)
        annotate_sbml_from_cobra(sbml_reaction, reaction)
        # add in bounds
        set_attrib(sbml_reaction, "fbc:upperFluxBound",
                   create_bound(reaction, "upper_bound"))
        set_attrib(sbml_reaction, "fbc:lowerFluxBound",
                   create_bound(reaction, "lower_bound"))

        # objective coefficient
        if reaction.objective_coefficient != 0:
            objective = SubElement(flux_objectives_list,
                                   ns("fbc:fluxObjective"))
            set_attrib(objective, "fbc:reaction", id)
            set_attrib(objective, "fbc:coefficient",
                       strnum(reaction.objective_coefficient))

        # stoichiometry
        reactants = {}
        products = {}
        for metabolite, stoichiomety in iteritems(reaction._metabolites):
            met_id = "M_" + metabolite.id
            if stoichiomety > 0:
                products[met_id] = strnum(stoichiomety)
            else:
                reactants[met_id] = strnum(-stoichiomety)
        if len(reactants) > 0:
            reactant_list = SubElement(sbml_reaction, "listOfReactants")
            for met_id, stoichiomety in sorted(iteritems(reactants)):
                SubElement(reactant_list, "speciesReference", species=met_id,
                           stoichiometry=stoichiomety, constant="true")
        if len(products) > 0:
            product_list = SubElement(sbml_reaction, "listOfProducts")
            for met_id, stoichiomety in sorted(iteritems(products)):
                SubElement(product_list, "speciesReference", species=met_id,
                           stoichiometry=stoichiomety, constant="true")

        # gene reaction rule
        gpr = reaction.gene_reaction_rule
        if gpr is not None and len(gpr) > 0:
            gpr = gpr.replace(".", SBML_DOT)
            gpr_xml = SubElement(sbml_reaction, GPR_TAG)
            try:
                parsed, _ = parse_gpr(gpr)
                construct_gpr_xml(gpr_xml, parsed.body)
            except Exception as e:
                print("failed on '%s' in %s" %
                      (reaction.gene_reaction_rule, repr(reaction)))
                raise e

    return xml
Example #15
0
def read_sbml_ec_model(
    filename: str,
    number: float = float,
    # re.Pattern does not exist in py36 so this type hint cannot be added now
    f_replace=F_REPLACE,
    set_missing_bounds: bool = False,
    hardcoded_rev_reactions: bool = True,
    **kwargs,
) -> Model:
    """Create `geckopy.Model` from SBMLDocument.

    Parameters
    ----------
    filename: str
    number: data type of stoichiometry: {float, int}
        In which data type should the stoichiometry be parsed.
    f_replace : dict of replacement functions for id replacement
    set_missing_bounds : flag to set missing bounds
    hardcoded_rev_reactions: bool
        if reversible reaction to account for proteins being consumed on both
        directions are written explicitly for in the SBML

    Returns
    -------
    cobra.core.Model

    """
    try:
        fsanitized = str(filename) if isinstance(filename, Path) else filename
        doc = _get_doc_from_filename(fsanitized)
    except IOError as e:
        raise e
    except Exception as original_error:
        raise CobraSBMLError(
            "Something went wrong reading the SBML model. Most likely the SBML"
            " model is not valid. Please check that your model is valid using "
            "the `cobra.io.sbml.validate_sbml_model` function or via the "
            "online validator at http://sbml.org/validator .\n"
            "\t`(model, errors) = validate_sbml_model(filename)`"
            "\nIf the model is valid and cannot be read please open an issue "
            f"at https://github.com/opencobra/cobrapy/issues: {original_error}"
        )

    if f_replace is None:
        f_replace = {}

    # SBML model
    model: libsbml.Model = doc.getModel()
    if model is None:
        raise CobraSBMLError("No SBML model detected in file.")
    model_fbc: libsbml.FbcModelPlugin = model.getPlugin("fbc")

    if not model_fbc:
        LOGGER.warning("Model does not contain SBML fbc package information.")
    else:
        if not model_fbc.isSetStrict():
            LOGGER.warning('Loading SBML model without fbc:strict="true"')

        # fbc-v1 (legacy)
        doc_fbc = doc.getPlugin("fbc")  # type: libsbml.FbcSBMLDocumentPlugin
        fbc_version = doc_fbc.getPackageVersion()

        if fbc_version == 1:
            LOGGER.warning("Loading SBML with fbc-v1 (models should be encoded"
                           " using fbc-v2)")
            conversion_properties = libsbml.ConversionProperties()
            conversion_properties.addOption("convert fbc v1 to fbc v2", True,
                                            "Convert FBC-v1 model to FBC-v2")
            result = doc.convert(conversion_properties)
            if result != libsbml.LIBSBML_OPERATION_SUCCESS:
                raise Exception("Conversion of SBML fbc v1 to fbc v2 failed")

    # Model
    model_id = model.getIdAttribute()
    if not libsbml.SyntaxChecker.isValidSBMLSId(model_id):
        LOGGER.error("'%s' is not a valid SBML 'SId'." % model_id)
    geckopy_model = Model(model_id,
                          hardcoded_rev_reactions=hardcoded_rev_reactions)
    geckopy_model.name = model.getName()

    # meta information
    meta = {
        "model.id": model_id,
        "level": model.getLevel(),
        "version": model.getVersion(),
        "packages": [],
    }
    # History
    creators = []
    created = None
    if model.isSetModelHistory():
        history = model.getModelHistory()  # type: libsbml.ModelHistory

        if history.isSetCreatedDate():
            created = history.getCreatedDate()

        for c in history.getListCreators():  # type: libsbml.ModelCreator
            creators.append({
                "familyName":
                c.getFamilyName() if c.isSetFamilyName() else None,
                "givenName":
                c.getGivenName() if c.isSetGivenName() else None,
                "organisation":
                c.getOrganisation() if c.isSetOrganisation() else None,
                "email":
                c.getEmail() if c.isSetEmail() else None,
            })

    meta["creators"] = creators
    meta["created"] = created
    meta["notes"] = _parse_notes_dict(doc)
    meta["annotation"] = _parse_annotations(doc)

    info = "<{}> SBML L{}V{}".format(model_id, model.getLevel(),
                                     model.getVersion())
    packages = {}
    for k in range(doc.getNumPlugins()):
        plugin = doc.getPlugin(k)  # type:libsbml.SBasePlugin
        key, value = plugin.getPackageName(), plugin.getPackageVersion()
        packages[key] = value
        info += ", {}-v{}".format(key, value)
        if key not in ["fbc", "groups", "l3v2extendedmath"]:
            LOGGER.warning(
                "SBML package '%s' not supported by cobrapy, "
                "information is not parsed",
                key,
            )
    meta["info"] = info
    meta["packages"] = packages
    geckopy_model._sbml = meta

    # notes and annotations
    geckopy_model.notes = _parse_notes_dict(model)
    geckopy_model.annotation = _parse_annotations(model)

    # Compartments
    # FIXME: update with new compartments
    compartments = {}
    for (compartment) in model.getListOfCompartments(
    ):  # noqa: E501 type: libsbml.Compartment
        cid = _check_required(compartment, compartment.getIdAttribute(), "id")
        compartments[cid] = compartment.getName()
    geckopy_model.compartments = compartments

    # Species
    metabolites = []
    # proteins that rely on the naming convention "prot_UNIPROT_ID" will be
    # catched here. Those who are annotated by groups membership will be parsed
    # after the groups are processed.
    proteins = []
    boundary_metabolites = []
    if model.getNumSpecies() == 0:
        LOGGER.warning("No metabolites in model")

    for specie in model.getListOfSpecies():  # type: libsbml.Species
        sid = _check_required(specie, specie.getIdAttribute(), "id")
        if f_replace and F_SPECIE in f_replace:
            sid = f_replace[F_SPECIE](sid)

        met = Metabolite(sid)
        met.name = specie.getName()
        met.notes = _parse_notes_dict(specie)
        met.annotation = _parse_annotations(specie)
        met.compartment = specie.getCompartment()
        initial_amount = specie.getInitialAmount()

        specie_fbc = specie.getPlugin("fbc")  # type: libsbml.FbcSpeciesPlugin
        if specie_fbc:
            met.charge = specie_fbc.getCharge()
            met.formula = specie_fbc.getChemicalFormula()
        else:
            if specie.isSetCharge():
                LOGGER.warning(
                    "Use of the species charge attribute is "
                    "discouraged, use fbc:charge "
                    "instead: %s",
                    specie,
                )
                met.charge = specie.getCharge()
            else:
                if "CHARGE" in met.notes:
                    LOGGER.warning(
                        "Use of CHARGE in the notes element is "
                        "discouraged, use fbc:charge "
                        "instead: %s",
                        specie,
                    )
                    try:
                        met.charge = int(met.notes["CHARGE"])
                    except ValueError:
                        # handle nan, na, NA, ...
                        pass

            if "FORMULA" in met.notes:
                LOGGER.warning(
                    "Use of FORMULA in the notes element is "
                    "discouraged, use fbc:chemicalFormula "
                    "instead: %s",
                    specie,
                )
                met.formula = met.notes["FORMULA"]

        # Detect boundary metabolites
        if specie.getBoundaryCondition() is True:
            boundary_metabolites.append(met)

        if not PROT_PATTERN.match(met.id):
            metabolites.append(met)
        else:
            proteins.append(Protein(met, concentration=initial_amount))

    geckopy_model.add_metabolites(metabolites)
    geckopy_model.add_proteins(proteins)

    # Add exchange reactions for boundary metabolites
    ex_reactions = []
    for met in boundary_metabolites:
        ex_rid = "EX_{}".format(met.id)
        ex_reaction = Reaction(ex_rid)
        ex_reaction.name = ex_rid
        ex_reaction.annotation = {"sbo": SBO_EXCHANGE_REACTION}
        ex_reaction.lower_bound = config.lower_bound
        ex_reaction.upper_bound = config.upper_bound
        LOGGER.warning("Adding exchange reaction %s with default bounds "
                       "for boundary metabolite: %s." %
                       (ex_reaction.id, met.id))
        # species is reactant
        ex_reaction.add_metabolites({met: -1})
        ex_reactions.append(ex_reaction)
    geckopy_model.add_reactions(ex_reactions)

    # Genes
    if model_fbc:
        for (gp) in model_fbc.getListOfGeneProducts(
        ):  # noqa: E501 type: libsbml.GeneProduct
            gid = _check_required(gp, gp.getIdAttribute(), "id")
            if f_replace and F_GENE in f_replace:
                gid = f_replace[F_GENE](gid)
            cobra_gene = Gene(gid)
            cobra_gene.name = gp.getName()
            if cobra_gene.name is None:
                cobra_gene.name = gid
            cobra_gene.annotation = _parse_annotations(gp)
            cobra_gene.notes = _parse_notes_dict(gp)

            geckopy_model.genes.append(cobra_gene)
    else:
        for (cobra_reaction) in model.getListOfReactions(
        ):  # noqa: E501 type: libsbml.Reaction
            # fallback to notes information
            notes = _parse_notes_dict(cobra_reaction)
            if "GENE ASSOCIATION" in notes:
                gpr = notes["GENE ASSOCIATION"]
            elif "GENE_ASSOCIATION" in notes:
                gpr = notes["GENE_ASSOCIATION"]
            else:
                gpr = ""

            if len(gpr) > 0:
                gpr = gpr.replace("(", ";")
                gpr = gpr.replace(")", ";")
                gpr = gpr.replace("or", ";")
                gpr = gpr.replace("and", ";")
                # Interaction of the above replacements can lead to multiple
                # ;, which results in empty gids
                gids = [t.strip() for t in gpr.split(";")]
                gids = set(gids).difference({""})

                # create missing genes
                for gid in gids:
                    if f_replace and F_GENE in f_replace:
                        gid = f_replace[F_GENE](gid)

                    if gid not in geckopy_model.genes:
                        cobra_gene = Gene(gid)
                        cobra_gene.name = gid
                        geckopy_model.genes.append(cobra_gene)

    # GPR rules
    def process_association(ass):
        """Recursively convert gpr association to a gpr string.

        Defined as inline functions to not pass the replacement dict around.
        """
        if ass.isFbcOr():
            return " ".join([
                "(",
                " or ".join(
                    process_association(c)
                    for c in ass.getListOfAssociations()),
                ")",
            ])
        elif ass.isFbcAnd():
            return " ".join([
                "(",
                " and ".join(
                    process_association(c)
                    for c in ass.getListOfAssociations()),
                ")",
            ])
        elif ass.isGeneProductRef():
            gid = ass.getGeneProduct()
            if f_replace and F_GENE in f_replace:
                return f_replace[F_GENE](gid)
            else:
                return gid

    # Reactions
    missing_bounds = False
    reactions = []
    if model.getNumReactions() == 0:
        LOGGER.warning("No reactions in model")

    for reaction in model.getListOfReactions():  # type: libsbml.Reaction
        rid = _check_required(reaction, reaction.getIdAttribute(), "id")
        # proteins are parsed based on Species, prot exchanges are ignored
        if PROT_EX_PATTERN.search(rid):
            continue
        if f_replace and F_REACTION in f_replace:
            rid = f_replace[F_REACTION](rid)
        cobra_reaction = Reaction(rid)
        cobra_reaction.name = reaction.getName()
        cobra_reaction.annotation = _parse_annotations(reaction)
        cobra_reaction.notes = _parse_notes_dict(reaction)

        # set bounds
        p_ub, p_lb = None, None
        r_fbc = reaction.getPlugin("fbc")  # type: libsbml.FbcReactionPlugin
        if r_fbc:
            # bounds in fbc
            lb_id = r_fbc.getLowerFluxBound()
            if lb_id:
                p_lb = model.getParameter(lb_id)  # type: libsbml.Parameter
                if p_lb and p_lb.getConstant() and (p_lb.getValue()
                                                    is not None):
                    cobra_reaction.lower_bound = p_lb.getValue()
                else:
                    raise CobraSBMLError("No constant bound '%s' for "
                                         "reaction: %s" % (p_lb, reaction))

            ub_id = r_fbc.getUpperFluxBound()
            if ub_id:
                p_ub = model.getParameter(ub_id)  # type: libsbml.Parameter
                if p_ub and p_ub.getConstant() and (p_ub.getValue()
                                                    is not None):
                    cobra_reaction.upper_bound = p_ub.getValue()
                else:
                    raise CobraSBMLError("No constant bound '%s' for "
                                         "reaction: %s" % (p_ub, reaction))

        elif reaction.isSetKineticLaw():
            # some legacy models encode bounds in kinetic laws
            klaw = reaction.getKineticLaw()  # type: libsbml.KineticLaw
            p_lb = klaw.getParameter(
                "LOWER_BOUND")  # noqa: E501 type: libsbml.LocalParameter
            if p_lb:
                cobra_reaction.lower_bound = p_lb.getValue()
            p_ub = klaw.getParameter(
                "UPPER_BOUND")  # noqa: E501 type: libsbml.LocalParameter
            if p_ub:
                cobra_reaction.upper_bound = p_ub.getValue()

            if p_ub is not None or p_lb is not None:
                LOGGER.warning(
                    "Encoding LOWER_BOUND and UPPER_BOUND in "
                    "KineticLaw is discouraged, "
                    "use fbc:fluxBounds instead: %s",
                    reaction,
                )

        if p_lb is None:
            missing_bounds = True
            lower_bound = config.lower_bound
            cobra_reaction.lower_bound = lower_bound
            LOGGER.warning(
                "Missing lower flux bound set to '%s' for "
                " reaction: '%s'",
                lower_bound,
                reaction,
            )

        if p_ub is None:
            missing_bounds = True
            upper_bound = config.upper_bound
            cobra_reaction.upper_bound = upper_bound
            LOGGER.warning(
                "Missing upper flux bound set to '%s' for "
                " reaction: '%s'",
                upper_bound,
                reaction,
            )

        # add reaction
        reactions.append(cobra_reaction)

        # parse equation
        stoichiometry = defaultdict(lambda: 0)
        for (sref) in reaction.getListOfReactants(
        ):  # noqa: E501 type: libsbml.SpeciesReference
            sid = _check_required(sref, sref.getSpecies(), "species")

            if f_replace and F_SPECIE in f_replace:
                sid = f_replace[F_SPECIE](sid)
            stoichiometry[sid] -= number(
                _check_required(sref, sref.getStoichiometry(),
                                "stoichiometry"))

        for (sref) in reaction.getListOfProducts(
        ):  # noqa: E501 type: libsbml.SpeciesReference
            sid = _check_required(sref, sref.getSpecies(), "species")

            if f_replace and F_SPECIE in f_replace:
                sid = f_replace[F_SPECIE](sid)
            stoichiometry[sid] += number(
                _check_required(sref, sref.getStoichiometry(),
                                "stoichiometry"))

        # convert to metabolite objects
        object_stoichiometry = {}
        for met_id in stoichiometry:
            target_set = (geckopy_model.proteins
                          if met_id in geckopy_model.proteins else
                          geckopy_model.metabolites)
            metabolite = target_set.get_by_id(met_id)
            object_stoichiometry[metabolite] = stoichiometry[met_id]
        cobra_reaction.add_metabolites(object_stoichiometry)

        # GPR
        if r_fbc:
            gpr = ""
            gpa = (r_fbc.getGeneProductAssociation()
                   )  # noqa: E501 type: libsbml.GeneProductAssociation
            if gpa is not None:
                association = (gpa.getAssociation()
                               )  # noqa: E501 type: libsbml.FbcAssociation
                gpr = process_association(association)
        else:
            # fallback to notes information
            notes = cobra_reaction.notes
            if "GENE ASSOCIATION" in notes:
                gpr = notes["GENE ASSOCIATION"]
            elif "GENE_ASSOCIATION" in notes:
                gpr = notes["GENE_ASSOCIATION"]
            else:
                gpr = ""

            if len(gpr) > 0:
                LOGGER.warning(
                    "Use of GENE ASSOCIATION or GENE_ASSOCIATION "
                    "in the notes element is discouraged, use "
                    "fbc:gpr instead: %s",
                    reaction,
                )
                if f_replace and F_GENE in f_replace:
                    gpr = " ".join(f_replace[F_GENE](t)
                                   for t in gpr.split(" "))

        # remove outside parenthesis, if any
        if gpr.startswith("(") and gpr.endswith(")"):
            try:
                parse_gpr(gpr[1:-1].strip())
                gpr = gpr[1:-1].strip()
            except (SyntaxError, TypeError) as e:
                LOGGER.warning(
                    f"Removing parenthesis from gpr {gpr} leads to "
                    f"an error, so keeping parenthesis, error: {e}", )

        cobra_reaction.gene_reaction_rule = gpr

    geckopy_model.add_reactions(reactions)

    # Objective
    obj_direction = "max"
    coefficients = {}
    if model_fbc:
        obj_list = (model_fbc.getListOfObjectives()
                    )  # noqa: E501 type: libsbml.ListOfObjectives
        if obj_list is None:
            LOGGER.warning("listOfObjectives element not found")
        elif obj_list.size() == 0:
            LOGGER.warning("No objective in listOfObjectives")
        elif not obj_list.getActiveObjective():
            LOGGER.warning("No active objective in listOfObjectives")
        else:
            obj_id = obj_list.getActiveObjective()
            obj = model_fbc.getObjective(obj_id)  # type: libsbml.Objective
            obj_direction = LONG_SHORT_DIRECTION[obj.getType()]

            for (flux_obj) in (obj.getListOfFluxObjectives()
                               ):  # noqa: E501 type: libsbml.FluxObjective
                rid = flux_obj.getReaction()
                if f_replace and F_REACTION in f_replace:
                    rid = f_replace[F_REACTION](rid)
                try:
                    objective_reaction = geckopy_model.reactions.get_by_id(rid)
                except KeyError:
                    raise CobraSBMLError("Objective reaction '%s' "
                                         "not found" % rid)
                try:
                    coefficients[objective_reaction] = number(
                        flux_obj.getCoefficient())
                except ValueError as e:
                    LOGGER.warning(str(e))
    else:
        # some legacy models encode objective coefficients in kinetic laws
        for reaction in model.getListOfReactions():  # type: libsbml.Reaction
            if reaction.isSetKineticLaw():
                klaw = reaction.getKineticLaw()  # type: libsbml.KineticLaw
                p_oc = klaw.getParameter(
                    "OBJECTIVE_COEFFICIENT")  # type: libsbml.LocalParameter
                if p_oc:
                    rid = _check_required(reaction, reaction.getIdAttribute(),
                                          "id")
                    if f_replace and F_REACTION in f_replace:
                        rid = f_replace[F_REACTION](rid)
                    try:
                        objective_reaction = geckopy_model.reactions.get_by_id(
                            rid)
                    except KeyError:
                        raise CobraSBMLError(
                            "Objective reaction '%s' "
                            "not found", rid)
                    try:
                        coefficients[objective_reaction] = number(
                            p_oc.getValue())
                    except ValueError as e:
                        LOGGER.warning(str(e))

                    LOGGER.warning(
                        "Encoding OBJECTIVE_COEFFICIENT in "
                        "KineticLaw is discouraged, "
                        "use fbc:fluxObjective "
                        "instead: %s",
                        reaction,
                    )

    if len(coefficients) == 0:
        LOGGER.error("No objective coefficients in model. Unclear what should "
                     "be optimized")
    set_objective(geckopy_model, coefficients)
    geckopy_model.solver.objective.direction = obj_direction

    # parse groups
    model_groups = model.getPlugin("groups")  # type: libsbml.GroupsModelPlugin
    groups = []
    if model_groups:
        # calculate hashmaps to lookup objects in O(1)
        sid_map = {}
        metaid_map = {}
        for obj_list in [
                model.getListOfCompartments(),
                model.getListOfSpecies(),
                model.getListOfReactions(),
                model_groups.getListOfGroups(),
        ]:

            for sbase in obj_list:  # type: libsbml.SBase
                if sbase.isSetId():
                    sid_map[sbase.getIdAttribute()] = sbase
                if sbase.isSetMetaId():
                    metaid_map[sbase.getMetaId()] = sbase

        # create groups
        for group in model_groups.getListOfGroups():  # type: libsbml.Group
            gid = _check_required(group, group.getIdAttribute(), "id")
            if f_replace and F_GROUP in f_replace:
                gid = f_replace[F_GROUP](gid)
            cobra_group = Group(gid)
            cobra_group.name = group.getName()
            if group.isSetKind():
                cobra_group.kind = group.getKindAsString()
            cobra_group.annotation = _parse_annotations(group)
            cobra_group.notes = _parse_notes_dict(group)

            cobra_members = []
            for member in group.getListOfMembers():  # type: libsbml.Member
                if member.isSetIdRef():
                    obj = sid_map[member.getIdRef()]
                elif member.isSetMetaIdRef():
                    obj = metaid_map[member.getMetaIdRef()]

                typecode = obj.getTypeCode()
                obj_id = _check_required(obj, obj.getIdAttribute(), "id")

                # id replacements
                cobra_member = None
                if typecode == libsbml.SBML_SPECIES:
                    if f_replace and F_SPECIE in f_replace:
                        obj_id = f_replace[F_SPECIE](obj_id)
                    try:
                        cobra_member = geckopy_model.metabolites.get_by_id(
                            obj_id)
                    except KeyError:
                        cobra_member = geckopy_model.proteins.get_by_id(obj_id)
                elif typecode == libsbml.SBML_REACTION:
                    if f_replace and F_REACTION in f_replace:
                        obj_id = f_replace[F_REACTION](obj_id)
                    cobra_member = geckopy_model.reactions.get_by_id(obj_id)
                elif typecode == libsbml.SBML_FBC_GENEPRODUCT:
                    if f_replace and F_GENE in f_replace:
                        obj_id = f_replace[F_GENE](obj_id)
                    cobra_member = geckopy_model.genes.get_by_id(obj_id)
                else:
                    LOGGER.warning("Member %s could not be added to group %s."
                                   "unsupported type code: "
                                   "%s" % (member, group, typecode))

                if cobra_member:
                    cobra_members.append(cobra_member)

            cobra_group.add_members(cobra_members)
            groups.append(cobra_group)
    else:
        # parse deprecated subsystems on reactions
        groups_dict = {}
        for cobra_reaction in geckopy_model.reactions:
            if "SUBSYSTEM" in cobra_reaction.notes:
                g_name = cobra_reaction.notes["SUBSYSTEM"]
                if g_name in groups_dict:
                    groups_dict[g_name].append(cobra_reaction)
                else:
                    groups_dict[g_name] = [cobra_reaction]

        for gid, cobra_members in groups_dict.items():
            if f_replace and F_GROUP in f_replace:
                gid = f_replace[F_GROUP](gid)
            cobra_group = Group(gid, name=gid, kind="collection")
            cobra_group.add_members(cobra_members)
            groups.append(cobra_group)

    geckopy_model.add_groups(groups)

    # now add everything under group Proteins to model.proteins if it was not
    # already added based on naming conventions
    if geckopy_model.groups.query("Protein"):
        g_proteins = geckopy_model.groups.Protein.members.copy()
        g_proteins = {
            prot: {reac: reac.metabolites[prot]
                   for reac in prot.reactions}
            for prot in g_proteins if prot not in geckopy_model.proteins
        }

        if g_proteins:
            geckopy_model.remove_metabolites(g_proteins.keys())
            geckopy_model.add_proteins([Protein(prot) for prot in g_proteins])
            for prot, reactions in g_proteins.items():
                for reac, stoich in reactions.items():
                    # reverse the (negative) stoichiometry coefficient to kcat
                    # we expect that Proteins that are identified only by their
                    # group are respecting the specification and do form part
                    # of reactions
                    # TODO: provide options to tune this behvior
                    reac.add_protein(prot.id, -1 / (stoich * 3600))

    # general hint for missing flux bounds
    if missing_bounds:
        LOGGER.warning(
            "Missing flux bounds on reactions set to default bounds."
            "As best practise and to avoid confusion flux bounds "
            "should be set explicitly on all reactions.")

    return geckopy_model