Ejemplo n.º 1
0
def test_gene_sbo_presence(model):
    """Expect all genes to have a some form of SBO-Term annotation.

    The Systems Biology Ontology (SBO) allows researchers to annotate a model
    with terms which indicate the intended function of its individual
    components. The available terms are controlled and relational and can be
    viewed here http://www.ebi.ac.uk/sbo/main/tree.

    Check if each cobra.Gene has a non-zero "annotation"
    attribute that contains the key "sbo".

    """
    ann = test_gene_sbo_presence.annotation
    ann["data"] = get_ids(sbo.find_components_without_sbo_terms(
        model, "genes"))
    try:
        ann["metric"] = len(ann["data"]) / len(model.genes)
        ann["message"] = wrapper.fill(
            """A total of {} genes ({:.2%}) lack annotation with any type of
            SBO term: {}""".format(len(ann["data"]), ann["metric"],
                                   truncate(ann["data"])))
    except ZeroDivisionError:
        ann["metric"] = 1.0
        ann["message"] = "The model has no genes."
        pytest.skip(ann["message"])
    assert len(ann["data"]) == len(model.genes), ann["message"]
Ejemplo n.º 2
0
def test_reaction_charge_balance(read_only_model):
    """
    Expect all reactions to be charge balanced.

    This will exclude biomass, exchange and demand reactions as they are
    unbalanced by definition. It will also fail all reactions where at
    least one metabolite does not have a charge defined.

    In steady state, for each metabolite the sum of influx equals the sum
    of outflux. Hence the net charges of both sides of any model reaction have
    to be equal. Reactions where at least one metabolite does not have a
    formula are not considered to be balanced, even though the remaining
    metabolites participating in the reaction might be.
    """
    ann = test_reaction_charge_balance.annotation
    internal_rxns = con_helpers.get_internals(read_only_model)
    ann["data"] = get_ids(
        consistency.find_charge_unbalanced_reactions(internal_rxns))
    ann["metric"] = len(ann["data"]) / len(internal_rxns)
    ann["message"] = wrapper.fill(
        """A total of {} ({:.2%}) reactions are charge unbalanced with at
        least one of the metabolites not having a charge or the overall
        charge not equal to 0: {}""".format(
            len(ann["data"]), ann["metric"], truncate(ann["data"])))
    assert len(ann["data"]) == 0, ann["message"]
Ejemplo n.º 3
0
def test_reaction_mass_balance(model):
    """
    Expect all reactions to be mass balanced.

    This will exclude biomass, exchange and demand reactions as they are
    unbalanced by definition. It will also fail all reactions where at
    least one metabolite does not have a formula defined.

    In steady state, for each metabolite the sum of influx equals the sum
    of efflux. Hence the net masses of both sides of any model reaction have
    to be equal. Reactions where at least one metabolite does not have a
    formula are not considered to be balanced, even though the remaining
    metabolites participating in the reaction might be.

    Implementation:
    For each reaction that isn't a boundary or biomass reaction check if each
    metabolite has a non-zero elements attribute and if so calculate if the
    overall element balance of reactants and products is equal to zero.

    """
    ann = test_reaction_mass_balance.annotation
    internal_rxns = con_helpers.get_internals(model)
    ann["data"] = get_ids(
        consistency.find_mass_unbalanced_reactions(internal_rxns)
    )
    ann["metric"] = len(ann["data"]) / len(internal_rxns)
    ann["message"] = wrapper.fill(
        """A total of {} ({:.2%}) reactions are mass unbalanced with at least
        one of the metabolites not having a formula or the overall mass not
        equal to 0: {}""".format(
            len(ann["data"]), ann["metric"], truncate(ann["data"])))
    assert len(ann["data"]) == 0, ann["message"]
Ejemplo n.º 4
0
def test_sink_specific_sbo_presence(read_only_model):
    """Expect all sink reactions to be annotated with SBO:0000632.

    SBO:0000632 represents the term 'sink reaction'. The Systems Biology
    Ontology defines an exchange reaction as follows: 'A modeling process to
    provide matter influx or efflux to a model, for example to replenish a
    metabolic network with raw materials (eg carbon / energy sources). Such
    reactions are conceptual, created solely for modeling purposes, and do not
    have a physical correspondence. Unlike the analogous demand (SBO:....)
    reactions, which are usually designated as irreversible, sink reactions
    always represent a reversible uptake/secretion processes, and act as a
    metabolite source with no cost to the cell. Sink reactions, also referred
    to as R_SINK_, are generally used for compounds that are metabolized by
    the cell but are produced by non-metabolic, un-modeled cellular processes.'
    Every sink reaction should be annotated with
    this. Sink reactions differ from exchange reactions in that the metabolites
    are not removed from the extracellular environment, but from any of the
    organism's compartments.
    """
    ann = test_sink_specific_sbo_presence.annotation
    sinks = helpers.find_sink_reactions(read_only_model)
    ann["data"] = get_ids(
        sbo.check_component_for_specific_sbo_term(sinks, "SBO:0000632"))
    try:
        ann["metric"] = len(ann["data"]) / len(sinks)
    except ZeroDivisionError:
        ann["metric"] = 1.0
        ann["message"] = "No sink reactions found."
        pytest.skip(ann["message"])
    ann["message"] = wrapper.fill(
        """A total of {} genes ({:.2%} of all sink reactions) lack
        annotation with the SBO term "SBO:0000632" for
        'sink reaction': {}""".format(len(ann["data"]), ann["metric"],
                                      truncate(ann["data"])))
    assert len(ann["data"]) == len(sinks), ann["message"]
Ejemplo n.º 5
0
def test_find_reactions_unbounded_flux_default_condition(read_only_model):
    """
    Expect the fraction of unbounded reactions to be low.

    A large fraction of model reactions able to carry unlimited flux under
    default conditions indicates problems with reaction directionality,
    missing cofactors, incorrectly defined transport reactions and more.
    """
    # TODO: Arbitrary threshold right now! Update after meta study!
    ann = test_find_reactions_unbounded_flux_default_condition.annotation
    unbounded_rxns, fraction, _ = \
        consistency.find_reactions_with_unbounded_flux_default_condition(
            read_only_model
        )
    ann["data"] = get_ids(unbounded_rxns)
    ann["metric"] = fraction
    ann["message"] = wrapper.fill(
        """ A fraction of {:.2%} of the non-blocked reactions (in total {}
        reactions) can carry unbounded flux in the default model
        condition. Unbounded reactions may be involved in
        thermodynamically infeasible cycles: {}""".format(
            len(ann["data"]), ann["metric"], truncate(ann["data"])
        )
    )
    assert ann["metric"] <= 0.1, ann["message"]
Ejemplo n.º 6
0
def test_gene_specific_sbo_presence(model):
    """Expect all genes to be annotated with SBO:0000243.

    SBO:0000243 represents the term 'gene'. Every gene should
    be annotated with this.

    Implementation:
    Check if each cobra.Gene has a non-zero "annotation"
    attribute that contains the key "sbo" with the associated
    value being one of the SBO terms above.

    """
    ann = test_gene_specific_sbo_presence.annotation
    ann["data"] = get_ids(sbo.check_component_for_specific_sbo_term(
        model.genes, "SBO:0000243"))
    try:
        ann["metric"] = len(ann["data"]) / len(model.genes)
        ann["message"] = wrapper.fill(
            """A total of {} genes ({:.2%} of all genes) lack
            annotation with the SBO term "SBO:0000243" for
            'gene': {}""".format(
                len(ann["data"]), ann["metric"], truncate(ann["data"])))
    except ZeroDivisionError:
        ann["metric"] = 1.0
        ann["message"] = "The model has no genes."
        pytest.skip(ann["message"])
    assert len(ann["data"]) == len(model.genes), ann["message"]
Ejemplo n.º 7
0
def test_stoichiometric_consistency(read_only_model):
    """
    Expect that the stoichiometry is consistent.

    Stoichiometric inconsistency violates universal constraints:
    1. Molecular masses are always positive, and
    2. On each side of a reaction the mass is conserved.
    A single incorrectly defined reaction can lead to stoichiometric
    inconsistency in the model, and consequently to unconserved metabolites.
    Similar to insufficient constraints, this may give rise to cycles which
    either produce mass from nothing or consume mass from the model.

    This test uses an implementation of the algorithm presented by
    Gevorgyan, A., M. G Poolman, and D. A Fell.
    "Detection of Stoichiometric Inconsistencies in Biomolecular Models."
    Bioinformatics 24, no. 19 (2008): 2245.
    doi: 10.1093/bioinformatics/btn425
    """
    ann = test_stoichiometric_consistency.annotation
    is_consistent = consistency.check_stoichiometric_consistency(
        read_only_model)
    ann["data"] = [] if is_consistent else get_ids(
        consistency.find_unconserved_metabolites(read_only_model))
    ann["metric"] = len(ann["data"]) / len(read_only_model.metabolites)
    ann["message"] = wrapper.fill(
        """This model contains {} ({:.2%}) unconserved
        metabolites: {}""".format(
            len(ann["data"]), ann["metric"], truncate(ann["data"])))
    assert is_consistent, ann["message"]
Ejemplo n.º 8
0
def test_transport_reaction_specific_sbo_presence(model):
    """Expect all transport reactions to be annotated properly.

    'SBO:0000185', 'SBO:0000588', 'SBO:0000587', 'SBO:0000655', 'SBO:0000654',
    'SBO:0000660', 'SBO:0000659', 'SBO:0000657', and 'SBO:0000658' represent
    the terms 'transport reaction' and 'translocation reaction', in addition
    to their children (more specific transport reaction labels). Every
    transport reaction that is not a pure metabolic or boundary reaction should
    be annotated with one of these terms. The results shown are relative to the
    total of all transport reactions.

    Implementation:
    Check if each transport reaction has a non-zero "annotation"
    attribute that contains the key "sbo" with the associated
    value being one of the SBO terms above.

    """
    sbo_transport_terms = helpers.TRANSPORT_RXN_SBO_TERMS
    ann = test_transport_reaction_specific_sbo_presence.annotation
    transports = helpers.find_transport_reactions(model)
    ann["data"] = get_ids(sbo.check_component_for_specific_sbo_term(
        transports, sbo_transport_terms))
    try:
        ann["metric"] = len(ann["data"]) / len(transports)
        ann["message"] = wrapper.fill(
            """A total of {} metabolic reactions ({:.2%} of all transport
            reactions) lack annotation with one of the SBO terms: {} for
            'biochemical reaction': {}""".format(
                len(ann["data"]), ann["metric"], sbo_transport_terms,
                truncate(ann["data"])))
    except ZeroDivisionError:
        ann["metric"] = 1.0
        ann["message"] = "The model has no transport reactions."
        pytest.skip(ann["message"])
    assert len(ann["data"]) == len(transports), ann["message"]
Ejemplo n.º 9
0
def test_gene_specific_sbo_presence(model):
    """Expect all genes to be annotated with SBO:0000243.

    SBO:0000243 represents the term 'gene'. Every gene should
    be annotated with this.

    Implementation:
    Check if each cobra.Gene has a non-zero "annotation"
    attribute that contains the key "sbo" with the associated
    value being one of the SBO terms above.

    """
    ann = test_gene_specific_sbo_presence.annotation
    ann["data"] = get_ids(
        sbo.check_component_for_specific_sbo_term(model.genes, "SBO:0000243"))
    try:
        ann["metric"] = len(ann["data"]) / len(model.genes)
        ann["message"] = wrapper.fill(
            """A total of {} genes ({:.2%} of all genes) lack
            annotation with the SBO term "SBO:0000243" for
            'gene': {}""".format(len(ann["data"]), ann["metric"],
                                 truncate(ann["data"])))
    except ZeroDivisionError:
        ann["metric"] = 1.0
        ann["message"] = "The model has no genes."
        pytest.skip(ann["message"])
    assert len(ann["data"]) == len(model.genes), ann["message"]
Ejemplo n.º 10
0
def test_metabolic_reaction_specific_sbo_presence(read_only_model):
    """Expect all metabolic reactions to be annotated with SBO:0000176.

    SBO:0000176 represents the term 'biochemical reaction'. Every metabolic
    reaction that is not a transport or boundary reaction should be annotated
    with this. The results shown are relative to the total amount of pure
    metabolic reactions.

    """
    ann = test_metabolic_reaction_specific_sbo_presence.annotation
    pure = basic.find_pure_metabolic_reactions(read_only_model)
    ann["data"] = get_ids(sbo.check_component_for_specific_sbo_term(
        pure, "SBO:0000176"))
    try:
        ann["metric"] = len(ann["data"]) / len(pure)
        ann["message"] = wrapper.fill(
            """A total of {} metabolic reactions ({:.2%} of all purely
            metabolic reactions) lack annotation with the SBO term
            "SBO:0000176" for 'biochemical reaction': {}""".format(
                len(ann["data"]), ann["metric"], truncate(ann["data"])))
    except ZeroDivisionError:
        ann["metric"] = 1.0
        ann["message"] = "The model has no metabolic reactions."
        pytest.skip(ann["message"])
    assert len(ann["data"]) == len(pure), ann["message"]
Ejemplo n.º 11
0
def test_reaction_mass_balance(model):
    """
    Expect all reactions to be mass balanced.

    This will exclude biomass, exchange and demand reactions as they are
    unbalanced by definition. It will also fail all reactions where at
    least one metabolite does not have a formula defined.

    In steady state, for each metabolite the sum of influx equals the sum
    of efflux. Hence the net masses of both sides of any model reaction have
    to be equal. Reactions where at least one metabolite does not have a
    formula are not considered to be balanced, even though the remaining
    metabolites participating in the reaction might be.

    Implementation:
    For each reaction that isn't a boundary or biomass reaction check if each
    metabolite has a non-zero elements attribute and if so calculate if the
    overall element balance of reactants and products is equal to zero.

    """
    ann = test_reaction_mass_balance.annotation
    internal_rxns = con_helpers.get_internals(model)
    ann["data"] = get_ids(
        consistency.find_mass_unbalanced_reactions(internal_rxns))
    ann["metric"] = len(ann["data"]) / len(internal_rxns)
    ann["message"] = wrapper.fill(
        """A total of {} ({:.2%}) reactions are mass unbalanced with at least
        one of the metabolites not having a formula or the overall mass not
        equal to 0: {}""".format(len(ann["data"]), ann["metric"],
                                 truncate(ann["data"])))
    assert len(ann["data"]) == 0, ann["message"]
Ejemplo n.º 12
0
def test_biomass_precursors_open_production(model, reaction_id):
    """
    Expect precursor production in complete medium.

    Using flux balance analysis this test optimizes for the production of each
    metabolite that is a substrate of the biomass reaction with the exception
    of atp and h2o. Optimizations are carried out using a complete
    medium i.e. unconstrained boundary reactions. This is useful when
    reconstructing the precursor biosynthesis pathways of a metabolic model.
    To pass this test, the model should be able to synthesis all the
    precursors.

    Implementation:
    First remove any constraints from all boundary reactions, then for each
    biomass precursor (except ATP and H2O) add a temporary demand
    reaction, then carry out FBA with this reaction as the objective. Collect
    all metabolites for which this optimization is below or equal to zero or is
    infeasible.

    """
    ann = test_biomass_precursors_open_production.annotation
    helpers.open_boundaries(model)
    reaction = model.reactions.get_by_id(reaction_id)
    ann["data"][reaction_id] = get_ids(
        biomass.find_blocked_biomass_precursors(reaction, model))
    ann["metric"][reaction_id] = len(ann["data"][reaction_id]) / \
        len(biomass.find_biomass_precursors(model, reaction))
    ann["message"][reaction_id] = wrapper.fill(
        """Using the biomass reaction {} and when the model is simulated in
        complete medium a total of {} precursors
        ({:.2%} of all precursors except h2o and atp) cannot be produced: {}
        """.format(reaction_id, len(ann["data"][reaction_id]),
                   ann["metric"][reaction_id], ann["data"][reaction_id]))
    assert len(ann["data"][reaction_id]) == 0, ann["message"][reaction_id]
Ejemplo n.º 13
0
def test_gene_sbo_presence(model):
    """Expect all genes to have a some form of SBO-Term annotation.

    The Systems Biology Ontology (SBO) allows researchers to annotate a model
    with terms which indicate the intended function of its individual
    components. The available terms are controlled and relational and can be
    viewed here http://www.ebi.ac.uk/sbo/main/tree.

    Check if each cobra.Gene has a non-zero "annotation"
    attribute that contains the key "sbo".

    """
    ann = test_gene_sbo_presence.annotation
    ann["data"] = get_ids(sbo.find_components_without_sbo_terms(
        model, "genes"))
    try:
        ann["metric"] = len(ann["data"]) / len(model.genes)
        ann["message"] = wrapper.fill(
            """A total of {} genes ({:.2%}) lack annotation with any type of
            SBO term: {}""".format(
                len(ann["data"]), ann["metric"], truncate(ann["data"])))
    except ZeroDivisionError:
        ann["metric"] = 1.0
        ann["message"] = "The model has no genes."
        pytest.skip(ann["message"])
    assert len(ann["data"]) == len(model.genes), ann["message"]
Ejemplo n.º 14
0
def test_transport_reaction_gpr_presence(model):
    """
    Expect a small fraction of transport reactions not to have a GPR rule.

    As it is hard to identify the exact transport processes within a cell,
    transport reactions are often added purely for modeling purposes.
    Highlighting where assumptions have been made versus where
    there is proof may help direct the efforts to improve transport and
    transport energetics of the tested metabolic model.
    However, transport reactions without GPR may also be valid:
    Diffusion, or known reactions with yet undiscovered genes likely lack GPR.

    Implementation:
    Check which cobra.Reactions classified as transport reactions have a
    non-empty "gene_reaction_rule" attribute.

    """
    # TODO: Update threshold with improved insight from meta study.
    ann = test_transport_reaction_gpr_presence.annotation
    ann["data"] = get_ids(basic.check_transport_reaction_gpr_presence(model))
    ann["metric"] = len(ann["data"]) / len(
        helpers.find_transport_reactions(model))
    ann["message"] = wrapper.fill(
        """There are a total of {} transport reactions ({:.2%} of all
        transport reactions) without GPR:
        {}""".format(len(ann["data"]), ann["metric"], truncate(ann["data"])))
    assert ann["metric"] < 0.2, ann["message"]
Ejemplo n.º 15
0
def test_gene_protein_reaction_rule_presence(model):
    """
    Expect all non-exchange reactions to have a GPR rule.

    Gene-Protein-Reaction rules express which gene has what function.
    The presence of this annotation is important to justify the existence
    of reactions in the model, and is required to conduct in silico gene
    deletion studies. However, reactions without GPR may also be valid:
    Spontaneous reactions, or known reactions with yet undiscovered genes
    likely lack GPR.

    Implementation:
    Check if each cobra.Reaction has a non-empty
    "gene_reaction_rule" attribute, which is set by the parser if there is an
    fbc:geneProductAssociation defined for the corresponding reaction in the
    SBML.

    """
    ann = test_gene_protein_reaction_rule_presence.annotation
    missing_gpr_metabolic_rxns = set(
        basic.check_gene_protein_reaction_rule_presence(model)
    ).difference(set(model.boundary))
    ann["data"] = get_ids(missing_gpr_metabolic_rxns)
    ann["metric"] = len(ann["data"]) / len(model.reactions)
    ann["message"] = wrapper.fill(
        """There are a total of {} reactions ({:.2%}) without GPR:
        {}""".format(len(ann["data"]), ann["metric"], truncate(ann["data"])))
    assert len(ann["data"]) == 0, ann["message"]
Ejemplo n.º 16
0
def test_biomass_specific_sbo_presence(read_only_model):
    """Expect all biomass reactions to be annotated with SBO:0000629.

    SBO:0000629 represents the term 'biomass production'. The Systems Biology
    Ontology defines an exchange reaction as follows: 'Biomass production,
    often represented 'R_BIOMASS_', is usually the optimization target reaction
    of constraint-based models, and can consume multiple reactants to produce
    multiple products. It is also assumed that parts of the reactants are also
    consumed in unrepresented processes and hence products do not have to
    reflect all the atom composition of the reactants. Formulation of a
    biomass production process entails definition of the macromolecular
    content (eg. cellular protein fraction), metabolic constitution of
    each fraction (eg. amino acids), and subsequently the atomic composition
    (eg. nitrogen atoms). More complex biomass functions can additionally
    incorporate details of essential vitamins and cofactors required for
    growth.'
    Every reaction representing the biomass production should be annotated with
    this.
    """
    ann = test_biomass_specific_sbo_presence.annotation
    biomass = helpers.find_biomass_reaction(read_only_model)
    ann["data"] = get_ids(
        sbo.check_component_for_specific_sbo_term(biomass, "SBO:0000629"))
    try:
        ann["metric"] = len(ann["data"]) / len(biomass)
    except ZeroDivisionError:
        ann["metric"] = 1.0
        ann["message"] = "No biomass reactions found."
        pytest.skip(ann["message"])
    ann["message"] = wrapper.fill(
        """A total of {} biomass reactions ({:.2%} of all biomass reactions)
        lack annotation with the SBO term "SBO:0000629" for
        'biomass production': {}""".format(len(ann["data"]), ann["metric"],
                                           truncate(ann["data"])))
    assert len(ann["data"]) == len(biomass), ann["message"]
Ejemplo n.º 17
0
def test_metabolic_reaction_specific_sbo_presence(model):
    """Expect all metabolic reactions to be annotated with SBO:0000176.

    SBO:0000176 represents the term 'biochemical reaction'. Every metabolic
    reaction that is not a transport or boundary reaction should be annotated
    with this. The results shown are relative to the total amount of pure
    metabolic reactions.

    Implementation:
    Check if each pure metabolic reaction has a non-zero "annotation"
    attribute that contains the key "sbo" with the associated
    value being the SBO term above.

    """
    ann = test_metabolic_reaction_specific_sbo_presence.annotation
    pure = basic.find_pure_metabolic_reactions(model)
    ann["data"] = get_ids(sbo.check_component_for_specific_sbo_term(
        pure, "SBO:0000176"))
    try:
        ann["metric"] = len(ann["data"]) / len(pure)
        ann["message"] = wrapper.fill(
            """A total of {} metabolic reactions ({:.2%} of all purely
            metabolic reactions) lack annotation with the SBO term
            "SBO:0000176" for 'biochemical reaction': {}""".format(
                len(ann["data"]), ann["metric"], truncate(ann["data"])))
    except ZeroDivisionError:
        ann["metric"] = 1.0
        ann["message"] = "The model has no metabolic reactions."
        pytest.skip(ann["message"])
    assert len(ann["data"]) == len(pure), ann["message"]
Ejemplo n.º 18
0
def test_transport_reaction_specific_sbo_presence(read_only_model):
    """Expect all transport reactions to be annotated properly.

    'SBO:0000185', 'SBO:0000588', 'SBO:0000587', 'SBO:0000655', 'SBO:0000654',
    'SBO:0000660', 'SBO:0000659', 'SBO:0000657', and 'SBO:0000658' represent
    the terms 'transport reaction' and 'translocation reaction', in addition
    to their children (more specific transport reaction labels). Every
    transport reaction that is not a pure metabolic or boundary reaction should
    be annotated with one of these terms. The results shown are relative to the
    total of all transport reactions.

    """
    sbo_transport_terms = helpers.TRANSPORT_RXN_SBO_TERMS
    ann = test_transport_reaction_specific_sbo_presence.annotation
    transports = helpers.find_transport_reactions(read_only_model)
    ann["data"] = get_ids(sbo.check_component_for_specific_sbo_term(
        transports, sbo_transport_terms))
    try:
        ann["metric"] = len(ann["data"]) / len(transports)
        ann["message"] = wrapper.fill(
            """A total of {} metabolic reactions ({:.2%} of all transport
            reactions) lack annotation with one of the SBO terms: {} for
            'biochemical reaction': {}""".format(
                len(ann["data"]), ann["metric"], sbo_transport_terms,
                truncate(ann["data"])))
    except ZeroDivisionError:
        ann["metric"] = 1.0
        ann["message"] = "The model has no transport reactions."
        pytest.skip(ann["message"])
    assert len(ann["data"]) == len(transports), ann["message"]
Ejemplo n.º 19
0
def test_demand_specific_sbo_presence(read_only_model):
    """Expect all demand reactions to be annotated with SBO:0000627.

    SBO:0000628 represents the term 'demand reaction'. The Systems Biology
    Ontology defines a demand reaction as follows: 'A modeling process
    analogous to exchange reaction, but which operates upon "internal"
    metabolites. Metabolites that are consumed by these reactions are assumed
    to be used in intra-cellular processes that are not part of the model.
    Demand reactions, often represented 'R_DM_', can also deliver metabolites
    (from intra-cellular processes that are not considered in the model).'
    Every demand reaction should be annotated with
    this. Demand reactions differ from exchange reactions in that the
    metabolites are not removed from the extracellular environment, but from
    any of the organism's compartments. Demand reactions differ from sink
    reactions in that they are designated as irreversible.

    """
    ann = test_demand_specific_sbo_presence.annotation
    demands = helpers.find_demand_reactions(read_only_model)
    ann["data"] = get_ids(sbo.check_component_for_specific_sbo_term(
        demands, "SBO:0000628"))
    try:
        ann["metric"] = len(ann["data"]) / len(demands)
        ann["message"] = wrapper.fill(
            """A total of {} genes ({:.2%} of all demand reactions) lack
            annotation with the SBO term "SBO:0000628" for
            'demand reaction': {}""".format(
                len(ann["data"]), ann["metric"], truncate(ann["data"])))
    except ZeroDivisionError:
        ann["metric"] = 1.0
        ann["message"] = "The model has no demand reactions."
        pytest.skip(ann["message"])
    assert len(ann["data"]) == len(demands), ann["message"]
Ejemplo n.º 20
0
def test_find_metabolites_not_consumed_with_open_bounds(model):
    """
    Expect metabolites to be consumable in complete medium.

    In complete medium, a model should be able to divert flux from every
    metabolite. This test opens all the boundary reactions i.e. simulates a
    complete medium and checks if any metabolite cannot be consumed
    individually using flux balance analysis. Metabolites that cannot be
    consumed this way are likely dead-end metabolites or upstream of reactions
    with fixed constraints. To pass this test all metabolites should be
    consumable.

    Implementation:
    Open all model boundary reactions, then for each metabolite in the model
    add a boundary reaction and minimize it with FBA.

    """
    ann = test_find_metabolites_not_consumed_with_open_bounds.annotation
    ann["data"] = get_ids(
        consistency.find_metabolites_not_consumed_with_open_bounds(model)
    )
    ann["metric"] = len(ann["data"]) / len(model.metabolites)
    ann["message"] = wrapper.fill(
        """A total of {} ({:.2%}) metabolites cannot be consumed in complete
        medium: {}""".format(
            len(ann["data"]), ann["metric"], truncate(ann["data"])))
    assert len(ann["data"]) == 0, ann["message"]
Ejemplo n.º 21
0
def test_find_pure_metabolic_reactions(model):
    """
    Expect at least one pure metabolic reaction to be defined in the model.

    If a reaction is neither a transport reaction, a biomass reaction nor a
    boundary reaction, it is counted as a purely metabolic reaction. This test
    requires the presence of metabolite formula to be able to identify
    transport reactions. This test is passed when the model contains at least
    one purely metabolic reaction i.e. a conversion of one metabolite into
    another.

    Implementation:
    From the list of all reactions, those that are boundary, transport and
    biomass reactions are removed and the remainder assumed to be pure
    metabolic reactions. Boundary reactions are identified using the attribute
    cobra.Model.boundary. Please read the description of "Transport Reactions"
    and "Biomass Reaction Identified" to learn how they are identified.

    """
    ann = test_find_pure_metabolic_reactions.annotation
    ann["data"] = get_ids(
        basic.find_pure_metabolic_reactions(model))
    ann["metric"] = len(ann["data"]) / len(model.reactions)
    ann["message"] = wrapper.fill(
        """A total of {:d} ({:.2%}) purely metabolic reactions are defined in
        the model, this excludes transporters, exchanges, or pseudo-reactions:
        {}""".format(len(ann["data"]), ann["metric"], truncate(ann["data"])))
    assert len(ann["data"]) >= 1, ann["message"]
Ejemplo n.º 22
0
def test_stoichiometric_consistency(model):
    """
    Expect that the stoichiometry is consistent.

    Stoichiometric inconsistency violates universal constraints:
    1. Molecular masses are always positive, and
    2. On each side of a reaction the mass is conserved.
    A single incorrectly defined reaction can lead to stoichiometric
    inconsistency in the model, and consequently to unconserved metabolites.
    Similar to insufficient constraints, this may give rise to cycles which
    either produce mass from nothing or consume mass from the model.

    Implementation:
    This test first uses an implementation of the algorithm presented in
    section 3.1 by Gevorgyan, A., M. G Poolman, and D. A Fell.
    "Detection of Stoichiometric Inconsistencies in Biomolecular Models."
    Bioinformatics 24, no. 19 (2008): 2245.
    doi: 10.1093/bioinformatics/btn425
    Should the model be inconsistent, then the list of unconserved metabolites
    is computed using the algorithm described in section 3.2 of the same
    publication.

    """
    ann = test_stoichiometric_consistency.annotation
    is_consistent = consistency.check_stoichiometric_consistency(
        model)
    ann["data"] = [] if is_consistent else get_ids(
        consistency.find_unconserved_metabolites(model))
    ann["metric"] = len(ann["data"]) / len(model.metabolites)
    ann["message"] = wrapper.fill(
        """This model contains {} ({:.2%}) unconserved
        metabolites: {}""".format(
            len(ann["data"]), ann["metric"], truncate(ann["data"])))
    assert is_consistent, ann["message"]
Ejemplo n.º 23
0
def test_gene_product_annotation_presence(model):
    """
    Expect all genes to have a non-empty annotation attribute.

    This test checks if any annotations at all are present in the SBML
    annotations field (extended by FBC package) for each gene product,
    irrespective of the type of annotation i.e. specific database,
    cross-references, ontology terms, additional information. For this test to
    pass the model is expected to have genes and each of them should have some
    form of annotation.

    Implementation:
    Check if the annotation attribute of each cobra.Gene object of the
    model is unset or empty.

    """
    ann = test_gene_product_annotation_presence.annotation
    ann["data"] = get_ids(
        annotation.find_components_without_annotation(model, "genes"))
    ann["metric"] = len(ann["data"]) / len(model.genes)
    ann["message"] = wrapper.fill(
        """A total of {} genes ({:.2%}) lack any form of
        annotation: {}""".format(len(ann["data"]), ann["metric"],
                                 truncate(ann["data"])))
    assert len(ann["data"]) == 0, ann["message"]
Ejemplo n.º 24
0
def test_transport_reaction_gpr_presence(model):
    """
    Expect a small fraction of transport reactions not to have a GPR rule.

    As it is hard to identify the exact transport processes within a cell,
    transport reactions are often added purely for modeling purposes.
    Highlighting where assumptions have been made versus where
    there is proof may help direct the efforts to improve transport and
    transport energetics of the tested metabolic model.
    However, transport reactions without GPR may also be valid:
    Diffusion, or known reactions with yet undiscovered genes likely lack GPR.

    Implementation:
    Check which cobra.Reactions classified as transport reactions have a
    non-empty "gene_reaction_rule" attribute.

    """
    # TODO: Update threshold with improved insight from meta study.
    ann = test_transport_reaction_gpr_presence.annotation
    ann["data"] = get_ids(
        basic.check_transport_reaction_gpr_presence(model)
    )
    ann["metric"] = len(ann["data"]) / len(
        helpers.find_transport_reactions(model)
    )
    ann["message"] = wrapper.fill(
        """There are a total of {} transport reactions ({:.2%} of all
        transport reactions) without GPR:
        {}""".format(len(ann["data"]), ann["metric"], truncate(ann["data"])))
    assert ann["metric"] < 0.2, ann["message"]
Ejemplo n.º 25
0
def test_protein_complex_presence(model):
    """
    Expect that more than one enzyme complex is present in the model.

    Based on the gene-protein-reaction (GPR) rules, it is possible to infer
    whether a reaction is catalyzed by a single gene product, isozymes or by a
    heteromeric protein complex. This test checks that at least one
    such heteromeric protein complex is defined in any GPR of the model. For
    S. cerevisiae it could be shown that "essential proteins tend to [cluster]
    together in essential complexes"
    (https://doi.org/10.1074%2Fmcp.M800490-MCP200).

    This might also be a relevant metric for other organisms.

    Implementation:
    Identify GPRs which contain at least one logical AND that combines two
    different gene products.

    """
    ann = test_protein_complex_presence.annotation
    ann["data"] = get_ids(basic.find_protein_complexes(model))
    ann["metric"] = len(ann["data"]) / len(model.reactions)
    ann["message"] = wrapper.fill(
        """A total of {:d} reactions are catalyzed by complexes defined
        through GPR rules in the model.""".format(len(ann["data"])))
    assert len(ann["data"]) >= 1, ann["message"]
Ejemplo n.º 26
0
def test_metabolites_charge_presence(model):
    """
    Expect all metabolites to have charge information.

    To be able to ensure that reactions are charge-balanced, all model
    metabolites ought to be provided with a charge. Since it may be
    difficult to obtain charges for certain metabolites this test serves as a
    mere report. Models can still be stoichiometrically consistent even
    when charge information is not defined for each metabolite.

    Implementation:
    Check if each cobra.Metabolite has a non-empty "charge"
    attribute. This attribute is set by the parser if there is an
    fbc:charge attribute for the corresponding species in the
    SBML.

    """
    ann = test_metabolites_charge_presence.annotation
    ann["data"] = get_ids(
        basic.check_metabolites_charge_presence(model))
    ann["metric"] = len(ann["data"]) / len(model.metabolites)
    ann["message"] = wrapper.fill(
        """There are a total of {}
        metabolites ({:.2%}) without a charge: {}""".format(
            len(ann["data"]), ann["metric"], truncate(ann["data"])))
    assert len(ann["data"]) == 0, ann["message"]
Ejemplo n.º 27
0
def test_ngam_presence(model):
    """
    Expect a single non growth-associated maintenance reaction.

    The Non-Growth Associated Maintenance reaction (NGAM) is an
    ATP-hydrolysis reaction added to metabolic models to represent energy
    expenses that the cell invests in continuous processes independent of
    the growth rate. Memote tries to infer this reaction from a list of
    buzzwords, and the stoichiometry and components of a simple ATP-hydrolysis
    reaction.

    Implementation:
    From the list of all reactions that convert ATP to ADP select the reactions
    that match the irreversible reaction "ATP + H2O -> ADP + HO4P + H+",
    whose metabolites are situated within the main model compartment.
    The main model compartment is assumed to be the cytosol, yet, if that
    cannot be identified, it is assumed to be the compartment with the most
    metabolites. The resulting list of reactions is then filtered further by
    attempting to match the reaction name with any of the following buzzwords
    ('maintenance', 'atpm', 'requirement', 'ngam', 'non-growth', 'associated').
    If this is possible only the filtered reactions are returned, if not the
    list is returned as is.

    """
    ann = test_ngam_presence.annotation
    ann["data"] = get_ids(basic.find_ngam(model))
    ann["metric"] = 1.0 - float(len(ann["data"]) == 1)
    ann["message"] = wrapper.fill(
        """A total of {} NGAM reactions could be identified:
        {}""".format(len(ann["data"]), truncate(ann["data"])))
    assert len(ann["data"]) == 1, ann["message"]
Ejemplo n.º 28
0
def test_find_metabolites_not_consumed_with_open_bounds(read_only_model):
    """
    Expect metabolites to be consumable in complete medium.

    In complete medium, a model should be able to divert flux from every
    metabolite. This test opens all the boundary reactions i.e. simulates a
    complete medium and checks if any metabolite cannot be consumed
    individually using flux balance analysis. Metabolites that cannot be
    consumed this way are likely dead-end metabolites or upstream of reactions
    with fixed constraints. To pass this test all metabolites should be
    consumable.

    """
    ann = test_find_metabolites_not_consumed_with_open_bounds.annotation
    ann["data"] = get_ids(
        consistency.find_metabolites_not_consumed_with_open_bounds(
            read_only_model
        )
    )
    ann["metric"] = len(ann["data"]) / len(read_only_model.metabolites)
    ann["message"] = wrapper.fill(
        """A total of {} ({:.2%}) metabolites cannot be consumed in complete
        medium: {}""".format(
            len(ann["data"]), ann["metric"], truncate(ann["data"])))
    assert len(ann["data"]) == 0, ann["message"]
Ejemplo n.º 29
0
def test_protein_complex_presence(model):
    """
    Expect that more than one enzyme complex is present in the model.

    Based on the gene-protein-reaction (GPR) rules, it is possible to infer
    whether a reaction is catalyzed by a single gene product, isozymes or by a
    heteromeric protein complex. This test checks that at least one
    such heteromeric protein complex is defined in any GPR of the model. For
    S. cerevisiae it could be shown that "essential proteins tend to [cluster]
    together in essential complexes"
    (https://doi.org/10.1074%2Fmcp.M800490-MCP200).

    This might also be a relevant metric for other organisms.

    Implementation:
    Identify GPRs which contain at least one logical AND that combines two
    different gene products.

    """
    ann = test_protein_complex_presence.annotation
    ann["data"] = get_ids(basic.find_protein_complexes(model))
    ann["metric"] = len(ann["data"]) / len(model.reactions)
    ann["message"] = wrapper.fill(
        """A total of {:d} reactions are catalyzed by complexes defined
        through GPR rules in the model.""".format(len(ann["data"])))
    assert len(ann["data"]) >= 1, ann["message"]
Ejemplo n.º 30
0
def test_find_pure_metabolic_reactions(model):
    """
    Expect at least one pure metabolic reaction to be defined in the model.

    If a reaction is neither a transport reaction, a biomass reaction nor a
    boundary reaction, it is counted as a purely metabolic reaction. This test
    requires the presence of metabolite formula to be able to identify
    transport reactions. This test is passed when the model contains at least
    one purely metabolic reaction i.e. a conversion of one metabolite into
    another.

    Implementation:
    From the list of all reactions, those that are boundary, transport and
    biomass reactions are removed and the remainder assumed to be pure
    metabolic reactions. Boundary reactions are identified using the attribute
    cobra.Model.boundary. Please read the description of "Transport Reactions"
    and "Biomass Reaction Identified" to learn how they are identified.

    """
    ann = test_find_pure_metabolic_reactions.annotation
    ann["data"] = get_ids(basic.find_pure_metabolic_reactions(model))
    ann["metric"] = len(ann["data"]) / len(model.reactions)
    ann["message"] = wrapper.fill(
        """A total of {:d} ({:.2%}) purely metabolic reactions are defined in
        the model, this excludes transporters, exchanges, or pseudo-reactions:
        {}""".format(len(ann["data"]), ann["metric"], truncate(ann["data"])))
    assert len(ann["data"]) >= 1, ann["message"]
Ejemplo n.º 31
0
def test_ngam_presence(model):
    """
    Expect a single non growth-associated maintenance reaction.

    The Non-Growth Associated Maintenance reaction (NGAM) is an
    ATP-hydrolysis reaction added to metabolic models to represent energy
    expenses that the cell invests in continuous processes independent of
    the growth rate. Memote tries to infer this reaction from a list of
    buzzwords, and the stoichiometry and components of a simple ATP-hydrolysis
    reaction.

    Implementation:
    From the list of all reactions that convert ATP to ADP select the reactions
    that match the irreversible reaction "ATP + H2O -> ADP + HO4P + H+",
    whose metabolites are situated within the main model compartment.
    The main model compartment is assumed to be the cytosol, yet, if that
    cannot be identified, it is assumed to be the compartment with the most
    metabolites. The resulting list of reactions is then filtered further by
    attempting to match the reaction name with any of the following buzzwords
    ('maintenance', 'atpm', 'requirement', 'ngam', 'non-growth', 'associated').
    If this is possible only the filtered reactions are returned, if not the
    list is returned as is.

    """
    ann = test_ngam_presence.annotation
    ann["data"] = get_ids(basic.find_ngam(model))
    ann["metric"] = 1.0 - float(len(ann["data"]) == 1)
    ann["message"] = wrapper.fill(
        """A total of {} NGAM reactions could be identified:
        {}""".format(len(ann["data"]), truncate(ann["data"])))
    assert len(ann["data"]) == 1, ann["message"]
Ejemplo n.º 32
0
def test_gene_protein_reaction_rule_presence(model):
    """
    Expect all non-exchange reactions to have a GPR rule.

    Gene-Protein-Reaction rules express which gene has what function.
    The presence of this annotation is important to justify the existence
    of reactions in the model, and is required to conduct in silico gene
    deletion studies. However, reactions without GPR may also be valid:
    Spontaneous reactions, or known reactions with yet undiscovered genes
    likely lack GPR.

    Implementation:
    Check if each cobra.Reaction has a non-empty
    "gene_reaction_rule" attribute, which is set by the parser if there is an
    fbc:geneProductAssociation defined for the corresponding reaction in the
    SBML.

    """
    ann = test_gene_protein_reaction_rule_presence.annotation
    missing_gpr_metabolic_rxns = set(
        basic.check_gene_protein_reaction_rule_presence(model)).difference(
            set(model.boundary))
    ann["data"] = get_ids(missing_gpr_metabolic_rxns)
    ann["metric"] = len(ann["data"]) / len(model.reactions)
    ann["message"] = wrapper.fill(
        """There are a total of {} reactions ({:.2%}) without GPR:
        {}""".format(len(ann["data"]), ann["metric"], truncate(ann["data"])))
    assert len(ann["data"]) == 0, ann["message"]
Ejemplo n.º 33
0
def test_biomass_precursors_open_production(model, reaction_id):
    """
    Expect precursor production in complete medium.

    Using flux balance analysis this test optimizes for the production of each
    metabolite that is a substrate of the biomass reaction with the exception
    of atp and h2o. Optimizations are carried out using a complete
    medium i.e. unconstrained boundary reactions. This is useful when
    reconstructing the precursor biosynthesis pathways of a metabolic model.
    To pass this test, the model should be able to synthesis all the
    precursors.
    """
    ann = test_biomass_precursors_open_production.annotation
    with model:
        for exchange in model.exchanges:
            exchange.bounds = (-1000, 1000)
        reaction = model.reactions.get_by_id(reaction_id)
        ann["data"][reaction_id] = get_ids(
            biomass.find_blocked_biomass_precursors(reaction, model))
    ann["message"][reaction_id] = wrapper.fill(
        """Using the biomass reaction {} and when the model is simulated in
        complete medium a total of {} precursors cannot be produced: {}
        """.format(reaction_id, len(ann["data"][reaction_id]),
                   ann["data"][reaction_id]))
    assert len(ann["data"][reaction_id]) == 0, ann["message"][reaction_id]
Ejemplo n.º 34
0
def test_reaction_annotation_wrong_ids(read_only_model, db):
    """
    Expect all annotations of reactions to be in the correct format.

    To identify databases and the identifiers belonging to them, computational
    tools rely on the presence of specific patterns. Only when these patterns
    can be identified consistently is an ID truly machine-readable. This test
    checks if the database cross-references in reaction annotations conform
    to patterns defined according to the MIRIAM guidelines, i.e. matching
    those that are defined at https://identifiers.org/.

    The required formats, i.e., regex patterns are further outlined in
    `annotation.py`. This test does not carry out a web query for the composed
    URI, it merely controls that the regex patterns match the identifiers.
    """
    ann = test_reaction_annotation_wrong_ids.annotation
    ann["data"][db] = get_ids(
        annotation.generate_component_annotation_miriam_match(
            read_only_model.reactions, "reactions", db))
    ann["metric"][db] = len(ann["data"][db]) / len(read_only_model.reactions)
    ann["message"][db] = wrapper.fill(
        """The provided reaction annotations for the {} database do not match
        the regular expression patterns defined on identifiers.org. A total of
        {} reaction annotations ({:.2%}) needs to be fixed: {}""".format(
            db, len(ann["data"][db]), ann["metric"][db],
            truncate(ann["data"][db])))
    assert len(ann["data"][db]) == 0, ann["message"][db]
Ejemplo n.º 35
0
def test_gene_product_annotation_presence(model):
    """
    Expect all genes to have a non-empty annotation attribute.

    This test checks if any annotations at all are present in the SBML
    annotations field (extended by FBC package) for each gene product,
    irrespective of the type of annotation i.e. specific database,
    cross-references, ontology terms, additional information. For this test to
    pass the model is expected to have genes and each of them should have some
    form of annotation.

    Implementation:
    Check if the annotation attribute of each cobra.Gene object of the
    model is unset or empty.

    """
    ann = test_gene_product_annotation_presence.annotation
    ann["data"] = get_ids(annotation.find_components_without_annotation(
        model, "genes"))
    ann["metric"] = len(ann["data"]) / len(model.genes)
    ann["message"] = wrapper.fill(
        """A total of {} genes ({:.2%}) lack any form of
        annotation: {}""".format(
            len(ann["data"]), ann["metric"], truncate(ann["data"])))
    assert len(ann["data"]) == 0, ann["message"]
Ejemplo n.º 36
0
def test_metabolite_annotation_overview(read_only_model, db):
    """
    Expect all metabolites to have annotations from common databases.

    Specific database cross-references are paramount to mapping information.
    To provide references to as many databases as possible helps to make the
    metabolic model more accessible to other researchers. This does not only
    facilitate the use of a model in a broad array of computational pipelines,
    it also promotes the metabolic model itself to become an organism-specific
    knowledge base.

    For this test to pass, each metabolite annotation should contain
    cross-references to a number of databases (listed in `annotation.py`).
    For each database this test checks for the presence of its corresponding
    namespace ID to comply with the MIRIAM guidelines i.e. they have to match
    those defined on https://identifiers.org/.

    Since each database is quite different and some potentially incomplete, it
    may not be feasible to achieve 100% coverage for each of them. Generally
    it should be possible, however, to obtain cross-references to at least
    one of the databases for all metabolites consistently.
    """
    ann = test_metabolite_annotation_overview.annotation
    ann["data"][db] = get_ids(
        annotation.generate_component_annotation_overview(
            read_only_model.metabolites, db))
    # TODO: metric must also be a dict in this case.
    ann["metric"][db] = len(ann["data"][db]) / len(read_only_model.metabolites)
    ann["message"][db] = wrapper.fill(
        """The following {} metabolites ({:.2%}) lack annotation for {}:
        {}""".format(len(ann["data"][db]), ann["metric"][db], db,
                     truncate(ann["data"][db])))
    assert len(ann["data"][db]) == 0, ann["message"][db]
Ejemplo n.º 37
0
def test_exchange_specific_sbo_presence(read_only_model):
    """Expect all exchange reactions to be annotated with SBO:0000627.

    SBO:0000627 represents the term 'exchange reaction'. The Systems Biology
    Ontology defines an exchange reaction as follows: 'A modeling process to
    provide matter influx or efflux to a model, for example to replenish a
    metabolic network with raw materials (eg carbon / energy sources). Such
    reactions are conceptual, created solely for modeling purposes, and do not
    have a  physical correspondence. Exchange reactions, often represented as
    'R_EX_', can operate in the negative (uptake) direction or positive
    (secretion) direction. By convention, a negative flux through an exchange
    reaction represents uptake of the corresponding metabolite, and a positive
    flux represent discharge.' Every exchange reaction should be annotated with
    this. Exchange reactions differ from demand reactions in that the
    metabolites are removed from or added to the extracellular
    environment only.

    """
    ann = test_exchange_specific_sbo_presence.annotation
    exchanges = helpers.find_exchange_rxns(read_only_model)
    ann["data"] = get_ids(sbo.check_component_for_specific_sbo_term(
        exchanges, "SBO:0000627"))
    try:
        ann["metric"] = len(ann["data"]) / len(exchanges)
        ann["message"] = wrapper.fill(
            """A total of {} exchange reactions ({:.2%} of all exchange
            reactions) lack annotation with the SBO term "SBO:0000627" for
            'exchange reaction': {}""".format(
                len(ann["data"]), ann["metric"], truncate(ann["data"])))
    except ZeroDivisionError:
        ann["metric"] = 1.0
        ann["message"] = "The model has no exchange reactions."
        pytest.skip(ann["message"])
    assert len(ann["data"]) == len(exchanges), ann["message"]
Ejemplo n.º 38
0
def test_stoichiometric_consistency(model):
    """
    Expect that the stoichiometry is consistent.

    Stoichiometric inconsistency violates universal constraints:
    1. Molecular masses are always positive, and
    2. On each side of a reaction the mass is conserved.
    A single incorrectly defined reaction can lead to stoichiometric
    inconsistency in the model, and consequently to unconserved metabolites.
    Similar to insufficient constraints, this may give rise to cycles which
    either produce mass from nothing or consume mass from the model.

    Implementation:
    This test first uses an implementation of the algorithm presented in
    section 3.1 by Gevorgyan, A., M. G Poolman, and D. A Fell.
    "Detection of Stoichiometric Inconsistencies in Biomolecular Models."
    Bioinformatics 24, no. 19 (2008): 2245.
    doi: 10.1093/bioinformatics/btn425
    Should the model be inconsistent, then the list of unconserved metabolites
    is computed using the algorithm described in section 3.2 of the same
    publication. In addition, the list of min unconservable sets is computed
    using the algorithm described in section 3.3.

    """
    ann = test_stoichiometric_consistency.annotation
    is_consistent = consistency.check_stoichiometric_consistency(model)
    ann["data"] = {
        "unconserved_metabolites": [] if is_consistent else get_ids(
            consistency.find_unconserved_metabolites(model)),
        "minimal_unconservable_sets": [] if is_consistent else [
            get_ids(mets)
            for mets in consistency.find_inconsistent_min_stoichiometry(model)
        ],
    }
    ann["metric"] = len(ann["data"]["unconserved_metabolites"]) / len(
        model.metabolites)
    ann["message"] = wrapper.fill(
        """This model contains {} ({:.2%}) unconserved
        metabolites: {}; and {} minimal unconservable sets: {}""".format(
            len(ann["data"]["unconserved_metabolites"]),
            ann["metric"],
            truncate(ann["data"]["unconserved_metabolites"]),
            len(ann["data"]["minimal_unconservable_sets"]),
            truncate(ann["data"]["minimal_unconservable_sets"]),
        ))
    assert is_consistent, ann["message"]
Ejemplo n.º 39
0
def test_gene_product_annotation_wrong_ids(model, db):
    """
    Expect all annotations of genes/gene-products to be in the correct format.

    To identify databases and the identifiers belonging to them, computational
    tools rely on the presence of specific patterns. Only when these patterns
    can be identified consistently is an ID truly machine-readable. This test
    checks if the database cross-references in reaction annotations conform
    to patterns defined according to the MIRIAM guidelines, i.e. matching
    those that are defined at https://identifiers.org/.

    The required formats, i.e., regex patterns are further outlined in
    `annotation.py`. This test does not carry out a web query for the composed
    URI, it merely controls that the regex patterns match the identifiers.

    Implementation:
    For those genes whose annotation keys match any of the tested
    databases, check if the corresponding values match the identifier pattern
    of each database.

    """
    ann = test_gene_product_annotation_wrong_ids.annotation
    ann["data"][db] = total = get_ids(
        set(model.genes).difference(
            annotation.generate_component_annotation_overview(
                model.genes, db)))
    ann["metric"][db] = 1.0
    ann["message"][db] = wrapper.fill(
        """There are no gene annotations for the {} database.
        """.format(db))
    assert len(total) > 0, ann["message"][db]
    ann["data"][db] = get_ids(
        annotation.generate_component_annotation_miriam_match(
            model.genes, "genes", db))
    ann["metric"][db] = len(ann["data"][db]) / len(model.genes)
    ann["message"][db] = wrapper.fill(
        """A total of {} gene annotations ({:.2%}) do not match the
        regular expression patterns defined on identifiers.org for the {}
        database: {}""".format(
            len(ann["data"][db]), ann["metric"][db], db,
            truncate(ann["data"][db])))
    assert len(ann["data"][db]) == 0, ann["message"][db]
Ejemplo n.º 40
0
def test_find_transport_reactions(read_only_model):
    """Expect >= 1 transport reactions are present in the read_only_model."""
    ann = test_find_transport_reactions.annotation
    ann["data"] = get_ids(helpers.find_transport_reactions(read_only_model))
    ann["metric"] = len(ann["data"]) / len(read_only_model.reactions)
    ann["message"] = wrapper.fill(
        """A total of {:d} ({:.2%}) transport reactions are defined in the
        model, this excludes purely metabolic reactions, exchanges, or
        pseudo-reactions: {}""".format(len(ann["data"]), ann["metric"],
                                       truncate(ann["data"])))
    assert len(ann["data"]) >= 1, ann["message"]
Ejemplo n.º 41
0
def test_direct_metabolites_in_biomass(model, reaction_id):
    """
    Expect the ratio of direct metabolites to be below 0.5.

    Some biomass precursors are taken from the media and directly consumed by
    the biomass reaction. It might not be a problem for ions or
    metabolites for which the organism in question is auxotrophic. However,
    too many of these metabolites may be artifacts of automated gap-filling
    procedures. Many gap-filling algorithms attempt to minimise the number of
    added reactions. This can lead to many biomass precursors being
    "direct metabolites".

    This test reports the ratio of direct metabolites to the total amount of
    precursors to a given biomass reaction. It specifically looks for
    metabolites that are only in either exchange, transport or biomass
    reactions. Bear in mind that this may lead to false positives in heavily
    compartimentalized models.

    To pass this test, the ratio of direct metabolites should be less than 50%
    of all biomass precursors. This is an arbitrary threshold but it takes
    into account that while certain ions do not serve a relevant metabolic
    function, it may still be important to include them in the biomass
    reaction to account for the impact of their uptake energy costs.

    This threshold is subject to change in the future.

    Implementation:
    Identify biomass precursors (excluding ATP and H+), identify cytosol
    and extracellular compartment from an internal mapping table. Then,
    determine which precursors is only involved in transport, boundary and
    biomass reactions. Using FBA with the biomass function as the objective
    then determine whether the metabolite is taken up only to be consumed by
    the biomass reaction.

    """
    # TODO: Update the threshold as soon as we have an overview of the average!
    ann = test_direct_metabolites_in_biomass.annotation
    reaction = model.reactions.get_by_id(reaction_id)
    try:
        ann["data"][reaction_id] = get_ids(
            biomass.find_direct_metabolites(model, reaction))
    except OptimizationError:
        ann["data"][reaction_id] = []
        ann["metric"][reaction_id] = 1.0
        ann["message"][reaction_id] = "This model does not grow."
        pytest.skip(ann["message"])
    ann["metric"][reaction_id] = len(ann["data"][reaction_id]) / \
        len(biomass.find_biomass_precursors(model, reaction))
    ann["message"][reaction_id] = wrapper.fill(
        """{} contains a total of {} direct metabolites ({:.2%}).
        Specifically these are: {}.
        """.format(reaction_id, len(ann["data"][reaction_id]),
                   ann["metric"][reaction_id], ann["data"][reaction_id]))
    assert ann["metric"][reaction_id] < 0.5, ann["message"][reaction_id]
Ejemplo n.º 42
0
def test_gene_product_annotation_wrong_ids(model, db):
    """
    Expect all annotations of genes/gene-products to be in the correct format.

    To identify databases and the identifiers belonging to them, computational
    tools rely on the presence of specific patterns. Only when these patterns
    can be identified consistently is an ID truly machine-readable. This test
    checks if the database cross-references in reaction annotations conform
    to patterns defined according to the MIRIAM guidelines, i.e. matching
    those that are defined at https://identifiers.org/.

    The required formats, i.e., regex patterns are further outlined in
    `annotation.py`. This test does not carry out a web query for the composed
    URI, it merely controls that the regex patterns match the identifiers.

    Implementation:
    For those genes whose annotation keys match any of the tested
    databases, check if the corresponding values match the identifier pattern
    of each database.

    """
    ann = test_gene_product_annotation_wrong_ids.annotation
    ann["data"][db] = total = get_ids(
        set(model.genes).difference(
            annotation.generate_component_annotation_overview(model.genes,
                                                              db)))
    ann["metric"][db] = 1.0
    ann["message"][db] = wrapper.fill(
        """There are no gene annotations for the {} database.
        """.format(db))
    assert len(total) > 0, ann["message"][db]
    ann["data"][db] = get_ids(
        annotation.generate_component_annotation_miriam_match(
            model.genes, "genes", db))
    ann["metric"][db] = len(ann["data"][db]) / len(model.genes)
    ann["message"][db] = wrapper.fill(
        """A total of {} gene annotations ({:.2%}) do not match the
        regular expression patterns defined on identifiers.org for the {}
        database: {}""".format(len(ann["data"][db]), ann["metric"][db], db,
                               truncate(ann["data"][db])))
    assert len(ann["data"][db]) == 0, ann["message"][db]
Ejemplo n.º 43
0
def test_reactions_presence(read_only_model):
    """
    Expect that more than one reaction is defined in the model.

    To be useful a metabolic model should consist at least of a few reactions.
    This test simply checks if there are more than one.
    """
    ann = test_reactions_presence.annotation
    assert hasattr(read_only_model, "reactions")
    ann["data"] = get_ids(read_only_model.reactions)
    ann["message"] = "{:d} reactions are defined in the model.".format(
        len(ann["data"]))
    assert len(ann["data"]) >= 1, ann["message"]
Ejemplo n.º 44
0
def test_metabolites_presence(read_only_model):
    """
    Expect that at least one metabolite is defined in the model.

    To be useful a metabolic model should consist at least of a few
    metabolites that are converted by reactions.
    This test simply checks if there are more than zero metabolites.
    """
    ann = test_metabolites_presence.annotation
    assert hasattr(read_only_model, "metabolites")
    ann["data"] = get_ids(read_only_model.metabolites)
    ann["message"] = "{:d} metabolites are defined in the model.".format(
        len(ann["data"]))
    assert len(ann["data"]) >= 1, ann["message"]
Ejemplo n.º 45
0
def test_find_deadends(read_only_model):
    """
    Expect no dead-ends to be present.

    Dead-ends are metabolites that can only be produced but not consumed by
    reactions in the model. They may indicate the presence of network gaps.
    """
    ann = test_find_deadends.annotation
    ann["data"] = get_ids(consistency.find_deadends(read_only_model))
    ann["metric"] = len(ann["data"]) / len(read_only_model.metabolites)
    ann["message"] = wrapper.fill(
        """A total of {} ({:.2%}) metabolites are not consumed by any reaction
        of the model: {}""".format(
            len(ann["data"]), ann["metric"], truncate(ann["data"])))
    assert ann["data"] == 0, ann["message"]
Ejemplo n.º 46
0
def test_reactions_presence(model):
    """
    Expect that at least one reaction is defined in the model.

    To be useful a metabolic model should consist at least of a few reactions.
    This test simply checks if there are more than zero reactions.

    Implementation:
    Check if the cobra.Model object has non-empty "reactions"
    attribute, this list is populated from the list of sbml:listOfReactions
    which should contain at least one sbml:reaction.

    """
    ann = test_reactions_presence.annotation
    assert hasattr(model, "reactions")
    ann["data"] = get_ids(model.reactions)
    ann["message"] = "{:d} reactions are defined in the model.".format(
        len(ann["data"]))
    assert len(ann["data"]) >= 1, ann["message"]
Ejemplo n.º 47
0
def test_find_transport_reactions(model):
    """
    Expect >= 1 transport reactions are present in the model.

    Cellular metabolism in any organism usually involves the transport of
    metabolites across a lipid bi-layer. This test reports how many
    of these reactions, which transports metabolites from one compartment
    to another, are present in the model, as at least one transport reaction
    must be present for cells to take up nutrients and/or excrete waste.

    Implementation:
    A transport reaction is defined as follows:
    1. It contains metabolites from at least 2 compartments and
    2. at least 1 metabolite undergoes no chemical reaction, i.e.,
    the formula and/or annotation stays the same on both sides of the equation.

    A notable exception is transport via PTS, which also contains the following
    restriction:
    3. The transported metabolite(s) are transported into a compartment through
    the exchange of a phosphate.

    An example of transport via PTS would be
    pep(c) + glucose(e) -> glucose-6-phosphate(c) + pyr(c)

    Reactions similar to transport via PTS (referred to as "modified transport
    reactions") follow a similar pattern:
    A(x) + B-R(y) -> A-R(y) + B(y)

    Such modified transport reactions can be detected, but only when the
    formula is defined for all metabolites in a particular reaction. If this
    is not the case, transport reactions are identified through annotations,
    which cannot detect modified transport reactions.

    """
    ann = test_find_transport_reactions.annotation
    ann["data"] = get_ids(helpers.find_transport_reactions(model))
    ann["metric"] = len(ann["data"]) / len(model.reactions)
    ann["message"] = wrapper.fill(
        """A total of {:d} ({:.2%}) transport reactions are defined in the
        model, this excludes purely metabolic reactions, exchanges, or
        pseudo-reactions: {}""".format(
            len(ann["data"]), ann["metric"], truncate(ann["data"])))
    assert len(ann["data"]) >= 1, ann["message"]
Ejemplo n.º 48
0
def test_sink_specific_sbo_presence(model):
    """Expect all sink reactions to be annotated with SBO:0000632.

    SBO:0000632 represents the term 'sink reaction'. The Systems Biology
    Ontology defines a sink reaction as follows: 'A modeling process to
    provide matter influx or efflux to a model, for example to replenish a
    metabolic network with raw materials (eg carbon / energy sources). Such
    reactions are conceptual, created solely for modeling purposes, and do not
    have a physical correspondence. Unlike the analogous demand (SBO:....)
    reactions, which are usually designated as irreversible, sink reactions
    always represent a reversible uptake/secretion processes, and act as a
    metabolite source with no cost to the cell. Sink reactions, also referred
    to as R_SINK_, are generally used for compounds that are metabolized by
    the cell but are produced by non-metabolic, un-modeled cellular processes.'
    Every sink reaction should be annotated with
    this. Sink reactions differ from exchange reactions in that the metabolites
    are not removed from the extracellular environment, but from any of the
    organism's compartments.

    Implementation:
    Check if each sink reaction has a non-zero "annotation"
    attribute that contains the key "sbo" with the associated
    value being one of the SBO terms above.

    """
    ann = test_sink_specific_sbo_presence.annotation
    sinks = helpers.find_sink_reactions(model)
    ann["data"] = get_ids(sbo.check_component_for_specific_sbo_term(
        sinks, "SBO:0000632"))
    try:
        ann["metric"] = len(ann["data"]) / len(sinks)
    except ZeroDivisionError:
        ann["metric"] = 1.0
        ann["message"] = "No sink reactions found."
        pytest.skip(ann["message"])
    ann["message"] = wrapper.fill(
        """A total of {} genes ({:.2%} of all sink reactions) lack
        annotation with the SBO term "SBO:0000632" for
        'sink reaction': {}""".format(
            len(ann["data"]), ann["metric"], truncate(ann["data"])
        ))
    assert len(ann["data"]) == len(sinks), ann["message"]
Ejemplo n.º 49
0
def test_biomass_specific_sbo_presence(model):
    """Expect all biomass reactions to be annotated with SBO:0000629.

    SBO:0000629 represents the term 'biomass production'. The Systems Biology
    Ontology defines an exchange reaction as follows: 'Biomass production,
    often represented 'R_BIOMASS_', is usually the optimization target reaction
    of constraint-based models, and can consume multiple reactants to produce
    multiple products. It is also assumed that parts of the reactants are also
    consumed in unrepresented processes and hence products do not have to
    reflect all the atom composition of the reactants. Formulation of a
    biomass production process entails definition of the macromolecular
    content (eg. cellular protein fraction), metabolic constitution of
    each fraction (eg. amino acids), and subsequently the atomic composition
    (eg. nitrogen atoms). More complex biomass functions can additionally
    incorporate details of essential vitamins and cofactors required for
    growth.'
    Every reaction representing the biomass production should be annotated with
    this.

    Implementation:
    Check if each biomass reaction has a non-zero "annotation"
    attribute that contains the key "sbo" with the associated
    value being one of the SBO terms above.

    """
    ann = test_biomass_specific_sbo_presence.annotation
    biomass = helpers.find_biomass_reaction(model)
    ann["data"] = get_ids(sbo.check_component_for_specific_sbo_term(
        biomass, "SBO:0000629"))
    try:
        ann["metric"] = len(ann["data"]) / len(biomass)
    except ZeroDivisionError:
        ann["metric"] = 1.0
        ann["message"] = "No biomass reactions found."
        pytest.skip(ann["message"])
    ann["message"] = wrapper.fill(
        """A total of {} biomass reactions ({:.2%} of all biomass reactions)
        lack annotation with the SBO term "SBO:0000629" for
        'biomass production': {}""".format(
            len(ann["data"]), ann["metric"], truncate(ann["data"])
        ))
    assert len(ann["data"]) == 0, ann["message"]
Ejemplo n.º 50
0
def test_gene_product_annotation_overview(model, db):
    """
    Expect all genes to have annotations from common databases.

    Specific database cross-references are paramount to mapping information.
    To provide references to as many databases as possible helps to make the
    metabolic model more accessible to other researchers. This does not only
    facilitate the use of a model in a broad array of computational pipelines,
    it also promotes the metabolic model itself to become an organism-specific
    knowledge base.

    For this test to pass, each gene annotation should contain
    cross-references to a number of databases. The currently selection is
    listed in `annotation.py`, but an ongoing discussion can be found at
    https://github.com/opencobra/memote/issues/332. For each database this
    test checks for the presence of its corresponding namespace ID to comply
    with the MIRIAM guidelines i.e. they have to match those defined on
    https://identifiers.org/.

    Since each database is quite different and some potentially incomplete, it
    may not be feasible to achieve 100% coverage for each of them. Generally
    it should be possible, however, to obtain cross-references to at least
    one of the databases for all gene products consistently.

    Implementation:
    Check if the keys of the annotation attribute of each cobra.Gene of
    the model match with a selection of common genome databases. The
    annotation  attribute of cobrapy components is a dictionary of
    key:value pairs.

    """
    ann = test_gene_product_annotation_overview.annotation
    ann["data"][db] = get_ids(
        annotation.generate_component_annotation_overview(
            model.genes, db))
    ann["metric"][db] = len(ann["data"][db]) / len(model.genes)
    ann["message"][db] = wrapper.fill(
        """The following {} genes ({:.2%}) lack annotation for {}:
        {}""".format(len(ann["data"][db]), ann["metric"][db], db,
                     truncate(ann["data"][db])))
    assert len(ann["data"][db]) == 0, ann["message"][db]
Ejemplo n.º 51
0
def test_genes_presence(model):
    """
    Expect that at least one gene is defined in the model.

    A metabolic model can still be a useful tool without any
    genes, however there are certain methods which rely on the presence of
    genes and, more importantly, the corresponding gene-protein-reaction
    rules. This test requires that there is at least one gene defined.

    Implementation:
    Check if the cobra.Model object has non-empty "genes"
    attribute, this list is populated from the list of fbc:listOfGeneProducts
    which should contain at least one fbc:geneProduct.

    """
    ann = test_genes_presence.annotation
    assert hasattr(model, "genes")
    ann["data"] = get_ids(model.genes)
    ann["message"] = "{:d} genes are defined in the model.".format(
        len(ann["data"]))
    assert len(ann["data"]) >= 1, ann["message"]
Ejemplo n.º 52
0
def test_find_disconnected(model):
    """
    Expect no disconnected metabolites to be present.

    Disconnected metabolites are not part of any reaction in the model. They
    are most likely left-over from the reconstruction process, but may also
    point to network and knowledge gaps.

    Implementation:
    Check for any metabolites of the cobra.Model object with emtpy reaction
    attribute.

    """
    ann = test_find_disconnected.annotation
    ann["data"] = get_ids(consistency.find_disconnected(model))
    ann["metric"] = len(ann["data"]) / len(model.metabolites)
    ann["message"] = wrapper.fill(
        """A total of {} ({:.2%}) metabolites are not associated with any
        reaction of the model: {}""".format(
            len(ann["data"]), ann["metric"], truncate(ann["data"])))
    assert len(ann["data"]) == 0, ann["message"]
Ejemplo n.º 53
0
def test_find_deadends(model):
    """
    Expect no dead-ends to be present.

    Dead-ends are metabolites that can only be produced but not consumed by
    reactions in the model. They may indicate the presence of network and
    knowledge gaps.

    Implementation:
    Find dead-end metabolites structurally by considering only reaction
    equations and reversibility. FBA is not carried out.

    """
    ann = test_find_deadends.annotation
    ann["data"] = get_ids(consistency.find_deadends(model))
    ann["metric"] = len(ann["data"]) / len(model.metabolites)
    ann["message"] = wrapper.fill(
        """A total of {} ({:.2%}) metabolites are not consumed by any reaction
        of the model: {}""".format(
            len(ann["data"]), ann["metric"], truncate(ann["data"])))
    assert ann["data"] == 0, ann["message"]
Ejemplo n.º 54
0
def test_reaction_id_namespace_consistency(model):
    """
    Expect reaction identifiers to be from the same namespace.

    In well-annotated models it is no problem if the pool of main identifiers
    for reactions consists of identifiers from several databases. However,
    in models that lack appropriate annotations, it may hamper the ability of
    other researchers to use it. Running the model through a computational
    pipeline may be difficult without first consolidating the namespace.

    Hence, this test checks if the main reaction identifiers can be
    attributed to one single namespace based on the regex patterns defined at
    https://identifiers.org/

    Implementation:
    Generate a pandas.DataFrame with each column corresponding to one
    database from the selection and each row to the reaction ID. A boolean
    entry indicates whether the metabolite ID matches the regex pattern
    of the corresponding database. Since the Biocyc pattern matches quite,
    assume that any instance of an identifier matching to Biocyc
    AND any other DB pattern is a false positive match for Biocyc and then set
    the boolean to ``false``. Sum the positive matches for each database and
    assume that the largest set is the 'main' identifier namespace.

    """
    ann = test_reaction_id_namespace_consistency.annotation
    overview = annotation.generate_component_id_namespace_overview(
        model, "reactions")
    distribution = overview.sum()
    cols = list(distribution.index)
    largest = distribution[cols].idxmax()
    # Assume that all identifiers match the largest namespace.
    ann["data"] = list(set(get_ids(model.reactions)).difference(
        overview[overview[largest]].index.tolist()))
    ann["metric"] = len(ann["data"]) / len(model.reactions)
    ann["message"] = wrapper.fill(
        """{} reaction identifiers ({:.2%}) deviate from the largest found
        namespace ({}): {}""".format(
            len(ann["data"]), ann["metric"], largest, truncate(ann["data"])))
    assert len(ann["data"]) == 0, ann["message"]
Ejemplo n.º 55
0
def test_exchange_specific_sbo_presence(model):
    """Expect all exchange reactions to be annotated with SBO:0000627.

    SBO:0000627 represents the term 'exchange reaction'. The Systems Biology
    Ontology defines an exchange reaction as follows: 'A modeling process to
    provide matter influx or efflux to a model, for example to replenish a
    metabolic network with raw materials (eg carbon / energy sources). Such
    reactions are conceptual, created solely for modeling purposes, and do not
    have a  physical correspondence. Exchange reactions, often represented as
    'R_EX_', can operate in the negative (uptake) direction or positive
    (secretion) direction. By convention, a negative flux through an exchange
    reaction represents uptake of the corresponding metabolite, and a positive
    flux represent discharge.' Every exchange reaction should be annotated with
    this. Exchange reactions differ from demand reactions in that the
    metabolites are removed from or added to the extracellular
    environment only.

    Implementation:
    Check if each exchange reaction has a non-zero "annotation"
    attribute that contains the key "sbo" with the associated
    value being one of the SBO terms above.

    """
    ann = test_exchange_specific_sbo_presence.annotation
    exchanges = helpers.find_exchange_rxns(model)
    ann["data"] = get_ids(sbo.check_component_for_specific_sbo_term(
        exchanges, "SBO:0000627"))
    try:
        ann["metric"] = len(ann["data"]) / len(exchanges)
        ann["message"] = wrapper.fill(
            """A total of {} exchange reactions ({:.2%} of all exchange
            reactions) lack annotation with the SBO term "SBO:0000627" for
            'exchange reaction': {}""".format(
                len(ann["data"]), ann["metric"], truncate(ann["data"])))
    except ZeroDivisionError:
        ann["metric"] = 1.0
        ann["message"] = "The model has no exchange reactions."
        pytest.skip(ann["message"])
    assert len(ann["data"]) == len(exchanges), ann["message"]
Ejemplo n.º 56
0
def test_demand_specific_sbo_presence(model):
    """Expect all demand reactions to be annotated with SBO:0000627.

    SBO:0000628 represents the term 'demand reaction'. The Systems Biology
    Ontology defines a demand reaction as follows: 'A modeling process
    analogous to exchange reaction, but which operates upon "internal"
    metabolites. Metabolites that are consumed by these reactions are assumed
    to be used in intra-cellular processes that are not part of the model.
    Demand reactions, often represented 'R_DM_', can also deliver metabolites
    (from intra-cellular processes that are not considered in the model).'
    Every demand reaction should be annotated with
    this. Demand reactions differ from exchange reactions in that the
    metabolites are not removed from the extracellular environment, but from
    any of the organism's compartments. Demand reactions differ from sink
    reactions in that they are designated as irreversible.

    Implementation:
    Check if each demand reaction has a non-zero "annotation"
    attribute that contains the key "sbo" with the associated
    value being one of the SBO terms above.

    """
    ann = test_demand_specific_sbo_presence.annotation
    demands = helpers.find_demand_reactions(model)
    ann["data"] = get_ids(sbo.check_component_for_specific_sbo_term(
        demands, "SBO:0000628"))
    try:
        ann["metric"] = len(ann["data"]) / len(demands)
        ann["message"] = wrapper.fill(
            """A total of {} genes ({:.2%} of all demand reactions) lack
            annotation with the SBO term "SBO:0000628" for
            'demand reaction': {}""".format(
                len(ann["data"]), ann["metric"], truncate(ann["data"])))
    except ZeroDivisionError:
        ann["metric"] = 1.0
        ann["message"] = "The model has no demand reactions."
        pytest.skip(ann["message"])
    assert len(ann["data"]) == len(demands), ann["message"]
Ejemplo n.º 57
0
def test_find_reversible_oxygen_reactions(model):
    """
    Expect zero or more oxygen-containing reactions to be reversible.

    The directionality of oxygen-producing/-consuming reactions affects the
    model's ability to grow anaerobically i.e. create faux-anaerobic organisms.
    This test reports how many of these oxygen-containing reactions are
    reversible. This test does not have any mandatory 'pass' criteria.

    Implementation:
    First, find the metabolite representing atmospheric oxygen in the model on
    the basis of an internal mapping table or by specifically looking for the
    formula "O2". Then, find all reactions that produce or consume oxygen and
    report those that are reversible.

    """
    ann = test_find_reversible_oxygen_reactions.annotation
    o2_rxns = basic.find_oxygen_reactions(model)
    ann["data"] = get_ids([rxn for rxn in o2_rxns if rxn.reversibility])
    ann["metric"] = len(ann["data"]) / len(o2_rxns)
    ann["message"] = wrapper.fill(
        """There are a total of {} reversible oxygen-containing reactions
        ({:.2%} of all oxygen-containing reactions): {}""".format(
            len(ann["data"]), ann["metric"], truncate(ann["data"])))
Ejemplo n.º 58
0
def find_met_in_model(model, mnx_id, compartment_id=None):
    """
    Return specific metabolites by looking up IDs in METANETX_SHORTLIST.

    Parameters
    ----------
    model : cobra.Model
        The metabolic model under investigation.
    mnx_id : string
        Memote internal MetaNetX metabolite identifier used to map between
        cross-references in the METANETX_SHORTLIST.
    compartment_id : string, optional
        ID of the specific compartment where the metabolites should be found.
        Defaults to returning matching metabolites from all compartments.

    Returns
    -------
    list
        cobra.Metabolite(s) matching the mnx_id.

    """
    def compare_annotation(annotation):
        """
        Return annotation IDs that match to METANETX_SHORTLIST references.

        Compares the set of METANETX_SHORTLIST references for a given mnx_id
        and the annotation IDs stored in a given annotation dictionary.
        """
        query_values = set(utils.flatten(annotation.values()))
        ref_values = set(utils.flatten(METANETX_SHORTLIST[mnx_id]))
        return query_values & ref_values

    # Make sure that the MNX ID we're looking up exists in the metabolite
    # shortlist.
    if mnx_id not in METANETX_SHORTLIST.columns:
        raise ValueError(
            "{} is not in the MetaNetX Shortlist! Make sure "
            "you typed the ID correctly, if yes, update the "
            "shortlist by updating and re-running the script "
            "generate_mnx_shortlists.py.".format(mnx_id)
        )
    candidates = []
    # The MNX ID used in the model may or may not be tagged with a compartment
    # tag e.g. `MNXM23141_c` vs. `MNXM23141`, which is tested with the
    # following regex.
    # If the MNX ID itself cannot be found as an ID, we try all other
    # identifiers that are provided by our shortlist of MetaNetX' mapping
    # table.
    regex = re.compile('^{}(_[a-zA-Z0-9]+)?$'.format(mnx_id))
    if model.metabolites.query(regex):
        candidates = model.metabolites.query(regex)
    elif model.metabolites.query(compare_annotation, attribute='annotation'):
        candidates = model.metabolites.query(
            compare_annotation, attribute='annotation'
        )
    else:
        for value in METANETX_SHORTLIST[mnx_id]:
            if value:
                for ident in value:
                    regex = re.compile('^{}(_[a-zA-Z0-9]+)?$'.format(ident))
                    if model.metabolites.query(regex, attribute='id'):
                        candidates.extend(
                            model.metabolites.query(regex, attribute='id'))

    # Return a list of all possible candidates if no specific compartment ID
    # is provided.
    # Otherwise, just return the candidate in one specific compartment. Raise
    # an exception if there are more than one possible candidates for a given
    # compartment.
    if compartment_id is None:
        print("compartment_id = None?")
        return candidates
    else:
        candidates_in_compartment = \
            [cand for cand in candidates if cand.compartment == compartment_id]

    if len(candidates_in_compartment) == 0:
        raise RuntimeError("It was not possible to identify "
                           "any metabolite in compartment {} corresponding to "
                           "the following MetaNetX identifier: {}."
                           "Make sure that a cross-reference to this ID in "
                           "the MetaNetX Database exists for your "
                           "identifier "
                           "namespace.".format(compartment_id, mnx_id))
    elif len(candidates_in_compartment) > 1:
        raise RuntimeError("It was not possible to uniquely identify "
                           "a single metabolite in compartment {} that "
                           "corresponds to the following MetaNetX "
                           "identifier: {}."
                           "Instead these candidates were found: {}."
                           "Check that metabolite compartment tags are "
                           "correct. Consider switching to a namespace scheme "
                           "where identifiers are truly "
                           "unique.".format(compartment_id,
                                            mnx_id,
                                            utils.get_ids(
                                                candidates_in_compartment
                                            ))
                           )
    else:
        return candidates_in_compartment
Ejemplo n.º 59
0
def test_find_candidate_irreversible_reactions(model):
    u"""
    Identify reversible reactions that could be irreversible.

    If a reaction is neither a transport reaction, a biomass reaction nor a
    boundary reaction, it is counted as a purely metabolic reaction.
    This test checks if the reversibility attribute of each reaction
    agrees with a thermodynamics-based
    calculation of reversibility.

    Implementation:
    To determine reversibility we calculate
    the reversibility index ln_gamma (natural logarithm of gamma) of each
    reaction
    using the eQuilibrator API. We consider reactions, whose reactants'
    concentrations would need to change by more than three orders of
    magnitude for the reaction flux to reverse direction, to be likely
    candidates of irreversible reactions. This assume default concentrations
    around 100 μM (~3 μM—3 mM) at pH = 7, I = 0.1 M and T = 298 K. The
    corresponding reversibility index is approximately 7. For
    further information on the thermodynamic and implementation details
    please refer to
    https://doi.org/10.1093/bioinformatics/bts317 and
    https://pypi.org/project/equilibrator-api/.

    Please note that currently eQuilibrator can only determine the
    reversibility index for chemically and redox balanced reactions whose
    metabolites can be mapped to KEGG compound identifiers (e.g. C00001). In
    addition
    to not being mappable to KEGG or the reaction not being balanced,
    there is a possibility that the metabolite cannot be broken down into
    chemical groups which is essential for the calculation of Gibbs energy
    using group contributions. This test collects each erroneous reaction
    and returns them as a tuple containing each list in the following order:
        1. Reactions with reversibility index
        2. Reactions with incomplete mapping to KEGG
        3. Reactions with metabolites that are problematic during calculation
        4. Chemically or redox unbalanced Reactions (after mapping to KEGG)

    This test simply reports the number of reversible reactions that, according
    to the reversibility index, are likely to be irreversible.

    """
    # With gamma = 1000, ln_gamma ~ 6.9. We use 7 as the cut-off.
    threshold = 7.0
    ann = test_find_candidate_irreversible_reactions.annotation
    met_rxns = basic.find_pure_metabolic_reactions(model)
    rev_index, incomplete, problematic, unbalanced = \
        thermo.find_thermodynamic_reversibility_index(met_rxns)
    ann["data"] = (
        # The reversibility index can be infinite so we convert it to a JSON
        # compatible string.
        [(r.id, str(i)) for r, i in rev_index],
        get_ids(incomplete),
        get_ids(problematic),
        get_ids(unbalanced)
    )
    num_irrev = sum(1 for r, i in rev_index if abs(i) >= threshold)
    ann["message"] = wrapper.fill(
        """Out of {} purely metabolic reactions, {} have an absolute
        reversibility index greater or equal to 7 and are therefore likely
        candidates for being irreversible.
        {} reactions could not be mapped to KEGG completely, {} contained
        'problematic' metabolites, and {} are chemically or redox imbalanced.
        """.format(len(met_rxns), num_irrev, len(incomplete), len(problematic),
                   len(unbalanced))
    )
    ann["metric"] = num_irrev / len(rev_index)