def test_fbc_presence(sbml_version): """ Expect the FBC plugin to be present. The Flux Balance Constraints (FBC) Package extends SBML with structured and semantic descriptions for domain-specific model components such as flux bounds, multiple linear objective functions, gene-protein-reaction associations, metabolite chemical formulas, charge and related annotations which are relevant for parameterized GEMs and FBA models. The SBML and constraint-based modeling communities collaboratively develop this package and update it based on user input. Implementation: Parse the state of the FBC plugin from the SBML document. """ fbc_present = sbml_version[2] is not None ann = test_fbc_presence.annotation ann["data"] = fbc_present ann["metric"] = 1.0 - float(fbc_present) if fbc_present: ann["message"] = wrapper.fill("The FBC package *is* used.") else: ann["message"] = wrapper.fill("The FBC package is *not* used.") assert fbc_present, ann["message"]
def test_reaction_id_namespace_consistency(read_only_model): """ Expect reaction identifiers to be from the same namespace. In well-annotated models it is no problem if the pool of main identifiers for reactions consists of identifiers from several databases. However, in models that lack appropriate annotations, it may hamper the ability of other researchers to use it. Running the model through a computational pipeline may be difficult without first consolidating the namespace. Hence, this test checks if the main reaction identifiers can be attributed to one single namespace based on the regex patterns defined at https://identifiers.org/ """ ann = test_reaction_id_namespace_consistency.annotation overview = annotation.generate_component_id_namespace_overview( read_only_model, "reactions") distribution = overview.sum() cols = list(distribution.index) largest = distribution[cols].idxmax() if largest != 'bigg.reaction': warn( wrapper.fill( """memote currently only supports syntax checks for BiGG identifiers. Please consider mapping your IDs from {} to BiGG """.format(largest))) # Assume that all identifiers match the largest namespace. ann["data"] = overview[overview[largest]].index.tolist() ann["metric"] = len(ann["data"]) / len(read_only_model.reactions) ann["message"] = wrapper.fill( """{} reaction identifiers ({:.2%}) do not match the largest found namespace ({}): {}""".format(len(ann["data"]), ann["metric"], largest, truncate(ann["data"]))) assert len(ann["data"]) == 0, ann["message"]
def test_find_reactions_with_partially_identical_annotations(model): """ Expect there to be zero duplicate reactions. Identify reactions in a pairwise manner that are annotated with identical database references. This does not take into account a reaction's directionality or compartment. The main reason for having this test is to help cleaning up merged models or models from automated reconstruction pipelines as these are prone to having identical reactions with identifiers from different namespaces. It could also be useful to identify a 'type' of reaction that occurs in several compartments. Implementation: Identify duplicate reactions globally by checking if any two metabolic reactions have the same entries in their annotation attributes. The heuristic looks at annotations with the keys "metanetx.reaction", "kegg.reaction", "brenda", "rhea", "biocyc", "bigg.reaction" only. """ ann = test_find_reactions_with_partially_identical_annotations.annotation duplicates, total = \ basic.find_reactions_with_partially_identical_annotations(model) ann["data"] = duplicates ann["metric"] = total / len(model.reactions) ann["message"] = wrapper.fill( """Based on annotations there are {} different groups of overlapping annotation which corresponds to a total of {} duplicated reactions in the model.""".format(len(duplicates), total)) assert total == 0, ann["message"]
def test_find_duplicate_metabolites_in_compartments(model): """ Expect there to be zero duplicate metabolites in the same compartments. The main reason for having this test is to help cleaning up merged models or models from automated reconstruction pipelines as these are prone to having identical metabolites from different namespaces (hence different IDs). This test therefore expects that every metabolite in any particular compartment has unique inchikey values. Implementation: Identifies duplicate metabolites in each compartment by determining if any two metabolites have identical InChI-key annotations. For instance, this function would find compounds with IDs ATP1 and ATP2 in the cytosolic compartment, with both having the same InChI annotations. """ ann = test_find_duplicate_metabolites_in_compartments.annotation ann["data"] = basic.find_duplicate_metabolites_in_compartments(model) ann["metric"] = len(ann["data"]) / len(model.metabolites) ann["message"] = wrapper.fill( """There are a total of {} metabolites in the model which have duplicates in the same compartment: {}""".format( len(ann["data"]), truncate(ann["data"]))) assert len(ann["data"]) == 0, ann["message"]
def test_reaction_annotation_wrong_ids(read_only_model, db): """ Expect all annotations of reactions to be in the correct format. To identify databases and the identifiers belonging to them, computational tools rely on the presence of specific patterns. Only when these patterns can be identified consistently is an ID truly machine-readable. This test checks if the database cross-references in reaction annotations conform to patterns defined according to the MIRIAM guidelines, i.e. matching those that are defined at https://identifiers.org/. The required formats, i.e., regex patterns are further outlined in `annotation.py`. This test does not carry out a web query for the composed URI, it merely controls that the regex patterns match the identifiers. """ ann = test_reaction_annotation_wrong_ids.annotation ann["data"][db] = get_ids( annotation.generate_component_annotation_miriam_match( read_only_model.reactions, "reactions", db)) ann["metric"][db] = len(ann["data"][db]) / len(read_only_model.reactions) ann["message"][db] = wrapper.fill( """The provided reaction annotations for the {} database do not match the regular expression patterns defined on identifiers.org. A total of {} reaction annotations ({:.2%}) needs to be fixed: {}""".format( db, len(ann["data"][db]), ann["metric"][db], truncate(ann["data"][db]))) assert len(ann["data"][db]) == 0, ann["message"][db]
def test_find_metabolites_not_consumed_with_open_bounds(model): """ Expect metabolites to be consumable in complete medium. In complete medium, a model should be able to divert flux from every metabolite. This test opens all the boundary reactions i.e. simulates a complete medium and checks if any metabolite cannot be consumed individually using flux balance analysis. Metabolites that cannot be consumed this way are likely dead-end metabolites or upstream of reactions with fixed constraints. To pass this test all metabolites should be consumable. Implementation: Open all model boundary reactions, then for each metabolite in the model add a boundary reaction and minimize it with FBA. """ ann = test_find_metabolites_not_consumed_with_open_bounds.annotation ann["data"] = get_ids( consistency.find_metabolites_not_consumed_with_open_bounds(model) ) ann["metric"] = len(ann["data"]) / len(model.metabolites) ann["message"] = wrapper.fill( """A total of {} ({:.2%}) metabolites cannot be consumed in complete medium: {}""".format( len(ann["data"]), ann["metric"], truncate(ann["data"]))) assert len(ann["data"]) == 0, ann["message"]
def test_blocked_reactions(model): """ Expect all reactions to be able to carry flux in complete medium. Universally blocked reactions are reactions that during Flux Variability Analysis cannot carry any flux while all model boundaries are open. Generally blocked reactions are caused by network gaps, which can be attributed to scope or knowledge gaps. Implementation: Use flux variability analysis (FVA) implemented in cobra.flux_analysis.find_blocked_reactions with open_exchanges=True. Please refer to the cobrapy documentation for more information: https://cobrapy.readthedocs.io/en/stable/autoapi/cobra/flux_analysis/ variability/index.html#cobra.flux_analysis.variability. find_blocked_reactions """ ann = test_blocked_reactions.annotation ann["data"] = find_blocked_reactions(model, open_exchanges=True) ann["metric"] = len(ann["data"]) / len(model.reactions) ann["message"] = wrapper.fill( """There are {} ({:.2%}) blocked reactions in the model: {}""".format( len(ann["data"]), ann["metric"], truncate(ann["data"]))) assert len(ann["data"]) == 0, ann["message"]
def test_gene_sbo_presence(model): """Expect all genes to have a some form of SBO-Term annotation. The Systems Biology Ontology (SBO) allows researchers to annotate a model with terms which indicate the intended function of its individual components. The available terms are controlled and relational and can be viewed here http://www.ebi.ac.uk/sbo/main/tree. Check if each cobra.Gene has a non-zero "annotation" attribute that contains the key "sbo". """ ann = test_gene_sbo_presence.annotation ann["data"] = get_ids(sbo.find_components_without_sbo_terms( model, "genes")) try: ann["metric"] = len(ann["data"]) / len(model.genes) ann["message"] = wrapper.fill( """A total of {} genes ({:.2%}) lack annotation with any type of SBO term: {}""".format(len(ann["data"]), ann["metric"], truncate(ann["data"]))) except ZeroDivisionError: ann["metric"] = 1.0 ann["message"] = "The model has no genes." pytest.skip(ann["message"]) assert len(ann["data"]) == len(model.genes), ann["message"]
def test_find_metabolites_not_consumed_with_open_bounds(read_only_model): """ Expect metabolites to be consumable in complete medium. In complete medium, a model should be able to divert flux from every metabolite. This test opens all the boundary reactions i.e. simulates a complete medium and checks if any metabolite cannot be consumed individually using flux balance analysis. Metabolites that cannot be consumed this way are likely dead-end metabolites or upstream of reactions with fixed constraints. To pass this test all metabolites should be consumable. """ ann = test_find_metabolites_not_consumed_with_open_bounds.annotation ann["data"] = get_ids( consistency.find_metabolites_not_consumed_with_open_bounds( read_only_model ) ) ann["metric"] = len(ann["data"]) / len(read_only_model.metabolites) ann["message"] = wrapper.fill( """A total of {} ({:.2%}) metabolites cannot be consumed in complete medium: {}""".format( len(ann["data"]), ann["metric"], truncate(ann["data"]))) assert len(ann["data"]) == 0, ann["message"]
def test_protein_complex_presence(model): """ Expect that more than one enzyme complex is present in the model. Based on the gene-protein-reaction (GPR) rules, it is possible to infer whether a reaction is catalyzed by a single gene product, isozymes or by a heteromeric protein complex. This test checks that at least one such heteromeric protein complex is defined in any GPR of the model. For S. cerevisiae it could be shown that "essential proteins tend to [cluster] together in essential complexes" (https://doi.org/10.1074%2Fmcp.M800490-MCP200). This might also be a relevant metric for other organisms. Implementation: Identify GPRs which contain at least one logical AND that combines two different gene products. """ ann = test_protein_complex_presence.annotation ann["data"] = get_ids(basic.find_protein_complexes(model)) ann["metric"] = len(ann["data"]) / len(model.reactions) ann["message"] = wrapper.fill( """A total of {:d} reactions are catalyzed by complexes defined through GPR rules in the model.""".format(len(ann["data"]))) assert len(ann["data"]) >= 1, ann["message"]
def test_compartments_presence(model): """ Expect that two or more compartments are defined in the model. While simplified metabolic models may be perfectly viable, generally across the tree of life organisms contain at least one distinct compartment: the cytosol or cytoplasm. In the case of prokaryotes there is usually a periplasm, and eurkaryotes are more complex. In addition to the internal compartment, a metabolic model also reflects the extracellular environment i.e. the medium/ metabolic context in which the modelled cells grow. Hence, in total, at least two compartments can be expected from a metabolic model. Implementation: Check if the cobra.Model object has a non-empty "compartments" attribute, this list is populated from the list of sbml:listOfCompartments which should contain at least two sbml:compartment elements. """ ann = test_compartments_presence.annotation assert hasattr(model, "compartments") ann["data"] = list(model.compartments) ann["metric"] = 1.0 - float(len(ann["data"]) >= 2) ann["message"] = wrapper.fill( """A total of {:d} compartments are defined in the model: {}""".format( len(ann["data"]), truncate(ann["data"]))) assert len(ann["data"]) >= 2, ann["message"]
def test_ngam_presence(model): """ Expect a single non growth-associated maintenance reaction. The Non-Growth Associated Maintenance reaction (NGAM) is an ATP-hydrolysis reaction added to metabolic models to represent energy expenses that the cell invests in continuous processes independent of the growth rate. Memote tries to infer this reaction from a list of buzzwords, and the stoichiometry and components of a simple ATP-hydrolysis reaction. Implementation: From the list of all reactions that convert ATP to ADP select the reactions that match the irreversible reaction "ATP + H2O -> ADP + HO4P + H+", whose metabolites are situated within the main model compartment. The main model compartment is assumed to be the cytosol, yet, if that cannot be identified, it is assumed to be the compartment with the most metabolites. The resulting list of reactions is then filtered further by attempting to match the reaction name with any of the following buzzwords ('maintenance', 'atpm', 'requirement', 'ngam', 'non-growth', 'associated'). If this is possible only the filtered reactions are returned, if not the list is returned as is. """ ann = test_ngam_presence.annotation ann["data"] = get_ids(basic.find_ngam(model)) ann["metric"] = 1.0 - float(len(ann["data"]) == 1) ann["message"] = wrapper.fill( """A total of {} NGAM reactions could be identified: {}""".format(len(ann["data"]), truncate(ann["data"]))) assert len(ann["data"]) == 1, ann["message"]
def test_gene_sbo_presence(model): """Expect all genes to have a some form of SBO-Term annotation. The Systems Biology Ontology (SBO) allows researchers to annotate a model with terms which indicate the intended function of its individual components. The available terms are controlled and relational and can be viewed here http://www.ebi.ac.uk/sbo/main/tree. Check if each cobra.Gene has a non-zero "annotation" attribute that contains the key "sbo". """ ann = test_gene_sbo_presence.annotation ann["data"] = get_ids(sbo.find_components_without_sbo_terms( model, "genes")) try: ann["metric"] = len(ann["data"]) / len(model.genes) ann["message"] = wrapper.fill( """A total of {} genes ({:.2%}) lack annotation with any type of SBO term: {}""".format( len(ann["data"]), ann["metric"], truncate(ann["data"]))) except ZeroDivisionError: ann["metric"] = 1.0 ann["message"] = "The model has no genes." pytest.skip(ann["message"]) assert len(ann["data"]) == len(model.genes), ann["message"]
def test_gene_specific_sbo_presence(model): """Expect all genes to be annotated with SBO:0000243. SBO:0000243 represents the term 'gene'. Every gene should be annotated with this. Implementation: Check if each cobra.Gene has a non-zero "annotation" attribute that contains the key "sbo" with the associated value being one of the SBO terms above. """ ann = test_gene_specific_sbo_presence.annotation ann["data"] = get_ids(sbo.check_component_for_specific_sbo_term( model.genes, "SBO:0000243")) try: ann["metric"] = len(ann["data"]) / len(model.genes) ann["message"] = wrapper.fill( """A total of {} genes ({:.2%} of all genes) lack annotation with the SBO term "SBO:0000243" for 'gene': {}""".format( len(ann["data"]), ann["metric"], truncate(ann["data"]))) except ZeroDivisionError: ann["metric"] = 1.0 ann["message"] = "The model has no genes." pytest.skip(ann["message"]) assert len(ann["data"]) == len(model.genes), ann["message"]
def test_find_duplicate_reactions(model): """ Expect there to be zero duplicate reactions. Identify reactions in a pairwise manner that use the same set of metabolites including potentially duplicate metabolites. Moreover, it will take a reaction's directionality and compartment into account. The main reason for having this test is to help cleaning up merged models or models from automated reconstruction pipelines as these are prone to having identical reactions with identifiers from different namespaces. Implementation: Compare reactions in a pairwise manner. For each reaction, the metabolite annotations are checked for a description of the structure (via InChI and InChIKey).If they exist, substrates and products as well as the stoichiometries of any reaction pair are compared. Only reactions where the substrates, products, stoichiometry and reversibility are identical are considered to be duplicates. This test will not be able to identify duplicate reactions if there are no structure annotations. Further, it will report reactions with differing bounds as equal if they otherwise match the above conditions. """ ann = test_find_duplicate_reactions.annotation duplicates, num = basic.find_duplicate_reactions(model) ann["data"] = duplicates ann["metric"] = num / len(model.reactions) ann["message"] = wrapper.fill( """Based on metabolites, directionality and compartment there are a total of {} reactions in the model which have duplicates: {}""".format(num, truncate(duplicates))) assert num == 0, ann["message"]
def test_find_pure_metabolic_reactions(model): """ Expect at least one pure metabolic reaction to be defined in the model. If a reaction is neither a transport reaction, a biomass reaction nor a boundary reaction, it is counted as a purely metabolic reaction. This test requires the presence of metabolite formula to be able to identify transport reactions. This test is passed when the model contains at least one purely metabolic reaction i.e. a conversion of one metabolite into another. Implementation: From the list of all reactions, those that are boundary, transport and biomass reactions are removed and the remainder assumed to be pure metabolic reactions. Boundary reactions are identified using the attribute cobra.Model.boundary. Please read the description of "Transport Reactions" and "Biomass Reaction Identified" to learn how they are identified. """ ann = test_find_pure_metabolic_reactions.annotation ann["data"] = get_ids(basic.find_pure_metabolic_reactions(model)) ann["metric"] = len(ann["data"]) / len(model.reactions) ann["message"] = wrapper.fill( """A total of {:d} ({:.2%}) purely metabolic reactions are defined in the model, this excludes transporters, exchanges, or pseudo-reactions: {}""".format(len(ann["data"]), ann["metric"], truncate(ann["data"]))) assert len(ann["data"]) >= 1, ann["message"]
def test_find_reactions_with_identical_genes(model): """ Expect there to be zero duplicate reactions. Identify reactions in a pairwise manner that use identical sets of genes. It does *not* take into account a reaction's directionality, compartment, metabolites or annotations. The main reason for having this test is to help cleaning up merged models or models from automated reconstruction pipelines as these are prone to having identical reactions with identifiers from different namespaces. Implementation: Compare reactions in a pairwise manner and group reactions whose genes are identical. Skip reactions with missing genes. """ ann = test_find_reactions_with_identical_genes.annotation rxn_groups, num_dup = basic.find_reactions_with_identical_genes(model) ann["data"] = rxn_groups ann["metric"] = num_dup / len(model.reactions) ann["message"] = wrapper.fill( """Based only on equal genes there are {} different groups of identical reactions which corresponds to a total of {} duplicated reactions in the model.""".format(len(rxn_groups), num_dup)) assert num_dup == 0, ann["message"]
def test_find_constrained_pure_metabolic_reactions(model): """ Expect zero or more purely metabolic reactions to have fixed constraints. If a reaction is neither a transport reaction, a biomass reaction nor a boundary reaction, it is counted as a purely metabolic reaction. This test requires the presence of metabolite formula to be able to identify transport reactions. This test simply reports the number of purely metabolic reactions that have fixed constraints and does not have any mandatory 'pass' criteria. Implementation: From the pool of pure metabolic reactions identify reactions which are constrained to values other than the model's minimal or maximal possible bounds. """ ann = test_find_constrained_pure_metabolic_reactions.annotation pmr = basic.find_pure_metabolic_reactions(model) ann["data"] = get_ids_and_bounds( [rxn for rxn in pmr if basic.is_constrained_reaction(model, rxn)]) ann["metric"] = len(ann["data"]) / len(pmr) ann["message"] = wrapper.fill( """A total of {:d} ({:.2%}) purely metabolic reactions have fixed constraints in the model, this excludes transporters, exchanges, or pseudo-reactions: {}""".format(len(ann["data"]), ann["metric"], truncate(ann["data"])))
def test_gene_specific_sbo_presence(model): """Expect all genes to be annotated with SBO:0000243. SBO:0000243 represents the term 'gene'. Every gene should be annotated with this. Implementation: Check if each cobra.Gene has a non-zero "annotation" attribute that contains the key "sbo" with the associated value being one of the SBO terms above. """ ann = test_gene_specific_sbo_presence.annotation ann["data"] = get_ids( sbo.check_component_for_specific_sbo_term(model.genes, "SBO:0000243")) try: ann["metric"] = len(ann["data"]) / len(model.genes) ann["message"] = wrapper.fill( """A total of {} genes ({:.2%} of all genes) lack annotation with the SBO term "SBO:0000243" for 'gene': {}""".format(len(ann["data"]), ann["metric"], truncate(ann["data"]))) except ZeroDivisionError: ann["metric"] = 1.0 ann["message"] = "The model has no genes." pytest.skip(ann["message"]) assert len(ann["data"]) == len(model.genes), ann["message"]
def test_find_constrained_transport_reactions(model): """ Expect zero or more transport reactions to have fixed constraints. Cellular metabolism in any organism usually involves the transport of metabolites across a lipid bi-layer. Hence, this test reports how many of these reactions, which transports metabolites from one compartment to another, have fixed constraints. This test does not have any mandatory 'pass' criteria. Implementation: Please refer to "Transport Reactions" for details on how memote identifies transport reactions. From the pool of transport reactions identify reactions which are constrained to values other than the model's median lower and upper bounds. """ ann = test_find_constrained_transport_reactions.annotation transporters = helpers.find_transport_reactions(model) ann["data"] = get_ids_and_bounds([ rxn for rxn in transporters if basic.is_constrained_reaction(model, rxn) ]) ann["metric"] = len(ann["data"]) / len(transporters) ann["message"] = wrapper.fill( """A total of {:d} ({:.2%}) transport reactions have fixed constraints in the model: {}""".format(len(ann["data"]), ann["metric"], truncate(ann["data"])))
def test_reaction_mass_balance(model): """ Expect all reactions to be mass balanced. This will exclude biomass, exchange and demand reactions as they are unbalanced by definition. It will also fail all reactions where at least one metabolite does not have a formula defined. In steady state, for each metabolite the sum of influx equals the sum of efflux. Hence the net masses of both sides of any model reaction have to be equal. Reactions where at least one metabolite does not have a formula are not considered to be balanced, even though the remaining metabolites participating in the reaction might be. Implementation: For each reaction that isn't a boundary or biomass reaction check if each metabolite has a non-zero elements attribute and if so calculate if the overall element balance of reactants and products is equal to zero. """ ann = test_reaction_mass_balance.annotation internal_rxns = con_helpers.get_internals(model) ann["data"] = get_ids( consistency.find_mass_unbalanced_reactions(internal_rxns) ) ann["metric"] = len(ann["data"]) / len(internal_rxns) ann["message"] = wrapper.fill( """A total of {} ({:.2%}) reactions are mass unbalanced with at least one of the metabolites not having a formula or the overall mass not equal to 0: {}""".format( len(ann["data"]), ann["metric"], truncate(ann["data"]))) assert len(ann["data"]) == 0, ann["message"]
def test_transport_reaction_gpr_presence(model): """ Expect a small fraction of transport reactions not to have a GPR rule. As it is hard to identify the exact transport processes within a cell, transport reactions are often added purely for modeling purposes. Highlighting where assumptions have been made versus where there is proof may help direct the efforts to improve transport and transport energetics of the tested metabolic model. However, transport reactions without GPR may also be valid: Diffusion, or known reactions with yet undiscovered genes likely lack GPR. Implementation: Check which cobra.Reactions classified as transport reactions have a non-empty "gene_reaction_rule" attribute. """ # TODO: Update threshold with improved insight from meta study. ann = test_transport_reaction_gpr_presence.annotation ann["data"] = get_ids(basic.check_transport_reaction_gpr_presence(model)) ann["metric"] = len(ann["data"]) / len( helpers.find_transport_reactions(model)) ann["message"] = wrapper.fill( """There are a total of {} transport reactions ({:.2%} of all transport reactions) without GPR: {}""".format(len(ann["data"]), ann["metric"], truncate(ann["data"]))) assert ann["metric"] < 0.2, ann["message"]
def test_stoichiometric_consistency(model): """ Expect that the stoichiometry is consistent. Stoichiometric inconsistency violates universal constraints: 1. Molecular masses are always positive, and 2. On each side of a reaction the mass is conserved. A single incorrectly defined reaction can lead to stoichiometric inconsistency in the model, and consequently to unconserved metabolites. Similar to insufficient constraints, this may give rise to cycles which either produce mass from nothing or consume mass from the model. Implementation: This test first uses an implementation of the algorithm presented in section 3.1 by Gevorgyan, A., M. G Poolman, and D. A Fell. "Detection of Stoichiometric Inconsistencies in Biomolecular Models." Bioinformatics 24, no. 19 (2008): 2245. doi: 10.1093/bioinformatics/btn425 Should the model be inconsistent, then the list of unconserved metabolites is computed using the algorithm described in section 3.2 of the same publication. """ ann = test_stoichiometric_consistency.annotation is_consistent = consistency.check_stoichiometric_consistency( model) ann["data"] = [] if is_consistent else get_ids( consistency.find_unconserved_metabolites(model)) ann["metric"] = len(ann["data"]) / len(model.metabolites) ann["message"] = wrapper.fill( """This model contains {} ({:.2%}) unconserved metabolites: {}""".format( len(ann["data"]), ann["metric"], truncate(ann["data"]))) assert is_consistent, ann["message"]
def test_find_unique_metabolites(model): """ Expect there to be less metabolites when removing compartment tag. Metabolites may be transported into different compartments, which means that in a compartimentalized model the number of metabolites may be much higher than in a model with no compartments. This test counts only one occurrence of each metabolite and returns this as the number of unique metabolites. The test expects that the model is compartimentalized, and thus, that the number of unique metabolites is generally lower than the total number of metabolites. Implementation: Reduce the list of metabolites to a unique set by removing the compartment tag. The cobrapy SBML parser adds compartment tags to each metabolite ID. """ ann = test_find_unique_metabolites.annotation ann["data"] = list(basic.find_unique_metabolites(model)) ann["metric"] = len(ann["data"]) / len(model.metabolites) ann["message"] = wrapper.fill( """Not counting the same entities in other compartments, there is a total of {} ({:.2%}) unique metabolites in the model: {}""".format( len(ann["data"]), ann["metric"], truncate(ann["data"]))) assert len(ann["data"]) < len(model.metabolites), ann["message"]
def test_find_reactions_unbounded_flux_default_condition(model): """ Expect the fraction of unbounded reactions to be low. A large fraction of model reactions able to carry unlimited flux under default conditions indicates problems with reaction directionality, missing cofactors, incorrectly defined transport reactions and more. Implementation: Without changing the default constraints run flux variability analysis. From the FVA results identify those reactions that carry flux equal to the model's maximal or minimal flux. """ ann = test_find_reactions_unbounded_flux_default_condition.annotation unbounded_rxn_ids, fraction, _ = \ consistency.find_reactions_with_unbounded_flux_default_condition(model) ann["data"] = unbounded_rxn_ids ann["metric"] = fraction ann["message"] = wrapper.fill( """ A fraction of {:.2%} of the non-blocked reactions (in total {} reactions) can carry unbounded flux in the default model condition. Unbounded reactions may be involved in thermodynamically infeasible cycles: {}""".format( ann["metric"], len(ann["data"]), truncate(ann["data"]) ) ) # TODO: Arbitrary threshold right now! Update after meta study! assert ann["metric"] <= 0.1, ann["message"]
def test_gene_essentiality_from_data_qualitative(model, experiment, threshold=0.95): """ Expect a perfect accuracy when predicting gene essentiality. The in-silico gene essentiality is compared with experimental data and the accuracy is expected to be better than 0.95. In principal, Matthews' correlation coefficient is a more comprehensive metric but is a little fragile to not having any false negatives or false positives in the output. """ ann = test_gene_essentiality_from_data_qualitative.annotation exp = pytest.memote.experimental.essentiality[experiment] expected = exp.data test = exp.evaluate(model) ann["data"][experiment] = confusion_matrix( set(test.loc[test["essential"], "gene"]), set(expected.loc[expected["essential"], "gene"]), set(test.loc[~test["essential"], "gene"]), set(expected.loc[~expected["essential"], "gene"])) ann["metric"][experiment] = ann["data"][experiment]["ACC"] ann["message"][experiment] = wrapper.fill( """Ideally, every model would show a perfect accuracy of 1. In experiment '{}' the model has {:.2}.""".format( experiment, ann["data"][experiment]["MCC"])) assert ann["data"][experiment]["ACC"] > threshold
def test_reaction_charge_balance(read_only_model): """ Expect all reactions to be charge balanced. This will exclude biomass, exchange and demand reactions as they are unbalanced by definition. It will also fail all reactions where at least one metabolite does not have a charge defined. In steady state, for each metabolite the sum of influx equals the sum of outflux. Hence the net charges of both sides of any model reaction have to be equal. Reactions where at least one metabolite does not have a formula are not considered to be balanced, even though the remaining metabolites participating in the reaction might be. """ ann = test_reaction_charge_balance.annotation internal_rxns = con_helpers.get_internals(read_only_model) ann["data"] = get_ids( consistency.find_charge_unbalanced_reactions(internal_rxns)) ann["metric"] = len(ann["data"]) / len(internal_rxns) ann["message"] = wrapper.fill( """A total of {} ({:.2%}) reactions are charge unbalanced with at least one of the metabolites not having a charge or the overall charge not equal to 0: {}""".format( len(ann["data"]), ann["metric"], truncate(ann["data"]))) assert len(ann["data"]) == 0, ann["message"]
def test_transport_reaction_specific_sbo_presence(model): """Expect all transport reactions to be annotated properly. 'SBO:0000185', 'SBO:0000588', 'SBO:0000587', 'SBO:0000655', 'SBO:0000654', 'SBO:0000660', 'SBO:0000659', 'SBO:0000657', and 'SBO:0000658' represent the terms 'transport reaction' and 'translocation reaction', in addition to their children (more specific transport reaction labels). Every transport reaction that is not a pure metabolic or boundary reaction should be annotated with one of these terms. The results shown are relative to the total of all transport reactions. Implementation: Check if each transport reaction has a non-zero "annotation" attribute that contains the key "sbo" with the associated value being one of the SBO terms above. """ sbo_transport_terms = helpers.TRANSPORT_RXN_SBO_TERMS ann = test_transport_reaction_specific_sbo_presence.annotation transports = helpers.find_transport_reactions(model) ann["data"] = get_ids(sbo.check_component_for_specific_sbo_term( transports, sbo_transport_terms)) try: ann["metric"] = len(ann["data"]) / len(transports) ann["message"] = wrapper.fill( """A total of {} metabolic reactions ({:.2%} of all transport reactions) lack annotation with one of the SBO terms: {} for 'biochemical reaction': {}""".format( len(ann["data"]), ann["metric"], sbo_transport_terms, truncate(ann["data"]))) except ZeroDivisionError: ann["metric"] = 1.0 ann["message"] = "The model has no transport reactions." pytest.skip(ann["message"]) assert len(ann["data"]) == len(transports), ann["message"]
def test_find_constrained_transport_reactions(model): """ Expect zero or more transport reactions to have fixed constraints. Cellular metabolism in any organism usually involves the transport of metabolites across a lipid bi-layer. Hence, this test reports how many of these reactions, which transports metabolites from one compartment to another, have fixed constraints. This test does not have any mandatory 'pass' criteria. Implementation: Please refer to "Transport Reactions" for details on how memote identifies transport reactions. From the pool of transport reactions identify reactions which are constrained to values other than the model's median lower and upper bounds. """ ann = test_find_constrained_transport_reactions.annotation transporters = helpers.find_transport_reactions(model) ann["data"] = get_ids_and_bounds( [rxn for rxn in transporters if basic.is_constrained_reaction( model, rxn)]) ann["metric"] = len(ann["data"]) / len(transporters) ann["message"] = wrapper.fill( """A total of {:d} ({:.2%}) transport reactions have fixed constraints in the model: {}""".format(len(ann["data"]), ann["metric"], truncate(ann["data"])))
def test_metabolic_reaction_specific_sbo_presence(model): """Expect all metabolic reactions to be annotated with SBO:0000176. SBO:0000176 represents the term 'biochemical reaction'. Every metabolic reaction that is not a transport or boundary reaction should be annotated with this. The results shown are relative to the total amount of pure metabolic reactions. Implementation: Check if each pure metabolic reaction has a non-zero "annotation" attribute that contains the key "sbo" with the associated value being the SBO term above. """ ann = test_metabolic_reaction_specific_sbo_presence.annotation pure = basic.find_pure_metabolic_reactions(model) ann["data"] = get_ids(sbo.check_component_for_specific_sbo_term( pure, "SBO:0000176")) try: ann["metric"] = len(ann["data"]) / len(pure) ann["message"] = wrapper.fill( """A total of {} metabolic reactions ({:.2%} of all purely metabolic reactions) lack annotation with the SBO term "SBO:0000176" for 'biochemical reaction': {}""".format( len(ann["data"]), ann["metric"], truncate(ann["data"]))) except ZeroDivisionError: ann["metric"] = 1.0 ann["message"] = "The model has no metabolic reactions." pytest.skip(ann["message"]) assert len(ann["data"]) == len(pure), ann["message"]
def test_find_duplicate_metabolites_in_compartments(model): """ Expect there to be zero duplicate metabolites in the same compartments. The main reason for having this test is to help cleaning up merged models or models from automated reconstruction pipelines as these are prone to having identical metabolites from different namespaces (hence different IDs). This test therefore expects that every metabolite in any particular compartment has unique inchikey values. Implementation: Identifies duplicate metabolites in each compartment by determining if any two metabolites have identical InChI-key annotations. For instance, this function would find compounds with IDs ATP1 and ATP2 in the cytosolic compartment, with both having the same InChI annotations. """ ann = test_find_duplicate_metabolites_in_compartments.annotation ann["data"] = basic.find_duplicate_metabolites_in_compartments( model) ann["metric"] = len(ann["data"]) / len(model.metabolites) ann["message"] = wrapper.fill( """There are a total of {} metabolites in the model which have duplicates in the same compartment: {}""".format( len(ann["data"]), truncate(ann["data"]))) assert len(ann["data"]) == 0, ann["message"]
def test_transport_reaction_gpr_presence(model): """ Expect a small fraction of transport reactions not to have a GPR rule. As it is hard to identify the exact transport processes within a cell, transport reactions are often added purely for modeling purposes. Highlighting where assumptions have been made versus where there is proof may help direct the efforts to improve transport and transport energetics of the tested metabolic model. However, transport reactions without GPR may also be valid: Diffusion, or known reactions with yet undiscovered genes likely lack GPR. Implementation: Check which cobra.Reactions classified as transport reactions have a non-empty "gene_reaction_rule" attribute. """ # TODO: Update threshold with improved insight from meta study. ann = test_transport_reaction_gpr_presence.annotation ann["data"] = get_ids( basic.check_transport_reaction_gpr_presence(model) ) ann["metric"] = len(ann["data"]) / len( helpers.find_transport_reactions(model) ) ann["message"] = wrapper.fill( """There are a total of {} transport reactions ({:.2%} of all transport reactions) without GPR: {}""".format(len(ann["data"]), ann["metric"], truncate(ann["data"]))) assert ann["metric"] < 0.2, ann["message"]
def test_gene_product_annotation_presence(model): """ Expect all genes to have a non-empty annotation attribute. This test checks if any annotations at all are present in the SBML annotations field (extended by FBC package) for each gene product, irrespective of the type of annotation i.e. specific database, cross-references, ontology terms, additional information. For this test to pass the model is expected to have genes and each of them should have some form of annotation. Implementation: Check if the annotation attribute of each cobra.Gene object of the model is unset or empty. """ ann = test_gene_product_annotation_presence.annotation ann["data"] = get_ids(annotation.find_components_without_annotation( model, "genes")) ann["metric"] = len(ann["data"]) / len(model.genes) ann["message"] = wrapper.fill( """A total of {} genes ({:.2%}) lack any form of annotation: {}""".format( len(ann["data"]), ann["metric"], truncate(ann["data"]))) assert len(ann["data"]) == 0, ann["message"]
def test_demand_specific_sbo_presence(read_only_model): """Expect all demand reactions to be annotated with SBO:0000627. SBO:0000628 represents the term 'demand reaction'. The Systems Biology Ontology defines a demand reaction as follows: 'A modeling process analogous to exchange reaction, but which operates upon "internal" metabolites. Metabolites that are consumed by these reactions are assumed to be used in intra-cellular processes that are not part of the model. Demand reactions, often represented 'R_DM_', can also deliver metabolites (from intra-cellular processes that are not considered in the model).' Every demand reaction should be annotated with this. Demand reactions differ from exchange reactions in that the metabolites are not removed from the extracellular environment, but from any of the organism's compartments. Demand reactions differ from sink reactions in that they are designated as irreversible. """ ann = test_demand_specific_sbo_presence.annotation demands = helpers.find_demand_reactions(read_only_model) ann["data"] = get_ids(sbo.check_component_for_specific_sbo_term( demands, "SBO:0000628")) try: ann["metric"] = len(ann["data"]) / len(demands) ann["message"] = wrapper.fill( """A total of {} genes ({:.2%} of all demand reactions) lack annotation with the SBO term "SBO:0000628" for 'demand reaction': {}""".format( len(ann["data"]), ann["metric"], truncate(ann["data"]))) except ZeroDivisionError: ann["metric"] = 1.0 ann["message"] = "The model has no demand reactions." pytest.skip(ann["message"]) assert len(ann["data"]) == len(demands), ann["message"]
def test_stoichiometric_consistency(read_only_model): """ Expect that the stoichiometry is consistent. Stoichiometric inconsistency violates universal constraints: 1. Molecular masses are always positive, and 2. On each side of a reaction the mass is conserved. A single incorrectly defined reaction can lead to stoichiometric inconsistency in the model, and consequently to unconserved metabolites. Similar to insufficient constraints, this may give rise to cycles which either produce mass from nothing or consume mass from the model. This test uses an implementation of the algorithm presented by Gevorgyan, A., M. G Poolman, and D. A Fell. "Detection of Stoichiometric Inconsistencies in Biomolecular Models." Bioinformatics 24, no. 19 (2008): 2245. doi: 10.1093/bioinformatics/btn425 """ ann = test_stoichiometric_consistency.annotation is_consistent = consistency.check_stoichiometric_consistency( read_only_model) ann["data"] = [] if is_consistent else get_ids( consistency.find_unconserved_metabolites(read_only_model)) ann["metric"] = len(ann["data"]) / len(read_only_model.metabolites) ann["message"] = wrapper.fill( """This model contains {} ({:.2%}) unconserved metabolites: {}""".format( len(ann["data"]), ann["metric"], truncate(ann["data"]))) assert is_consistent, ann["message"]
def test_find_reactions_with_identical_genes(model): """ Expect there to be zero duplicate reactions. Identify reactions in a pairwise manner that use identical sets of genes. It does *not* take into account a reaction's directionality, compartment, metabolites or annotations. The main reason for having this test is to help cleaning up merged models or models from automated reconstruction pipelines as these are prone to having identical reactions with identifiers from different namespaces. Implementation: Compare reactions in a pairwise manner and group reactions whose genes are identical. Skip reactions with missing genes. """ ann = test_find_reactions_with_identical_genes.annotation rxn_groups, num_dup = basic.find_reactions_with_identical_genes(model) ann["data"] = rxn_groups ann["metric"] = num_dup / len(model.reactions) ann["message"] = wrapper.fill( """Based only on equal genes there are {} different groups of identical reactions which corresponds to a total of {} duplicated reactions in the model.""".format( len(rxn_groups), num_dup)) assert num_dup == 0, ann["message"]
def test_transport_reaction_specific_sbo_presence(read_only_model): """Expect all transport reactions to be annotated properly. 'SBO:0000185', 'SBO:0000588', 'SBO:0000587', 'SBO:0000655', 'SBO:0000654', 'SBO:0000660', 'SBO:0000659', 'SBO:0000657', and 'SBO:0000658' represent the terms 'transport reaction' and 'translocation reaction', in addition to their children (more specific transport reaction labels). Every transport reaction that is not a pure metabolic or boundary reaction should be annotated with one of these terms. The results shown are relative to the total of all transport reactions. """ sbo_transport_terms = helpers.TRANSPORT_RXN_SBO_TERMS ann = test_transport_reaction_specific_sbo_presence.annotation transports = helpers.find_transport_reactions(read_only_model) ann["data"] = get_ids(sbo.check_component_for_specific_sbo_term( transports, sbo_transport_terms)) try: ann["metric"] = len(ann["data"]) / len(transports) ann["message"] = wrapper.fill( """A total of {} metabolic reactions ({:.2%} of all transport reactions) lack annotation with one of the SBO terms: {} for 'biochemical reaction': {}""".format( len(ann["data"]), ann["metric"], sbo_transport_terms, truncate(ann["data"]))) except ZeroDivisionError: ann["metric"] = 1.0 ann["message"] = "The model has no transport reactions." pytest.skip(ann["message"]) assert len(ann["data"]) == len(transports), ann["message"]
def test_gene_protein_reaction_rule_presence(model): """ Expect all non-exchange reactions to have a GPR rule. Gene-Protein-Reaction rules express which gene has what function. The presence of this annotation is important to justify the existence of reactions in the model, and is required to conduct in silico gene deletion studies. However, reactions without GPR may also be valid: Spontaneous reactions, or known reactions with yet undiscovered genes likely lack GPR. Implementation: Check if each cobra.Reaction has a non-empty "gene_reaction_rule" attribute, which is set by the parser if there is an fbc:geneProductAssociation defined for the corresponding reaction in the SBML. """ ann = test_gene_protein_reaction_rule_presence.annotation missing_gpr_metabolic_rxns = set( basic.check_gene_protein_reaction_rule_presence(model) ).difference(set(model.boundary)) ann["data"] = get_ids(missing_gpr_metabolic_rxns) ann["metric"] = len(ann["data"]) / len(model.reactions) ann["message"] = wrapper.fill( """There are a total of {} reactions ({:.2%}) without GPR: {}""".format(len(ann["data"]), ann["metric"], truncate(ann["data"]))) assert len(ann["data"]) == 0, ann["message"]
def test_metabolites_charge_presence(model): """ Expect all metabolites to have charge information. To be able to ensure that reactions are charge-balanced, all model metabolites ought to be provided with a charge. Since it may be difficult to obtain charges for certain metabolites this test serves as a mere report. Models can still be stoichiometrically consistent even when charge information is not defined for each metabolite. Implementation: Check if each cobra.Metabolite has a non-empty "charge" attribute. This attribute is set by the parser if there is an fbc:charge attribute for the corresponding species in the SBML. """ ann = test_metabolites_charge_presence.annotation ann["data"] = get_ids( basic.check_metabolites_charge_presence(model)) ann["metric"] = len(ann["data"]) / len(model.metabolites) ann["message"] = wrapper.fill( """There are a total of {} metabolites ({:.2%}) without a charge: {}""".format( len(ann["data"]), ann["metric"], truncate(ann["data"]))) assert len(ann["data"]) == 0, ann["message"]
def test_detect_energy_generating_cycles(read_only_model, met): u""" Expect that no energy metabolite can be produced out of nothing. When a model is not sufficiently constrained to account for the thermodynamics of reactions, flux cycles may form which provide reduced metabolites to the model without requiring nutrient uptake. These cycles are referred to as erroneous energy-generating cycles. Their effect on the predicted growth rate in FBA may account for an increase of up to 25%, which makes studies involving the growth rates predicted from such models unreliable. This test uses an implementation of the algorithm presented by: Fritzemeier, C. J., Hartleb, D., Szappanos, B., Papp, B., & Lercher, M. J. (2017). Erroneous energy-generating cycles in published genome scale metabolic networks: Identification and removal. PLoS Computational Biology, 13(4), 1–14. http://doi.org/10.1371/journal.pcbi.1005494 """ ann = test_detect_energy_generating_cycles.annotation if met not in read_only_model.metabolites: pytest.skip("This test has been skipped since metabolite {} could " "not be found in the model.".format(met)) ann["data"][met] = consistency.detect_energy_generating_cycles( read_only_model, met) ann["message"][met] = wrapper.fill( """The model can produce '{}' without requiring resources. This is caused by improperly constrained reactions leading to erroneous energy-generating cycles. The following {} reactions are involved in those cycles: {}""".format( met, len(ann["data"][met]), truncate(ann["data"][met]))) assert len(ann["data"][met]) == 0, ann["message"][met]
def test_absolute_extreme_coefficient_ratio(model, threshold=1e9): """ Show the ratio of the absolute largest and smallest non-zero coefficients. This test will return the absolute largest and smallest, non-zero coefficients of the stoichiometric matrix. A large ratio of these values may point to potential numerical issues when trying to solve different mathematical optimization problems such as flux-balance analysis. To pass this test the ratio should not exceed 10^9. This threshold has been selected based on experience, and is likely to be adapted when more data on solver performance becomes available. Implementation: Compose the stoichiometric matrix, then calculate absolute coefficients and lastly use the maximal value and minimal non-zero value to calculate the ratio. """ ann = test_absolute_extreme_coefficient_ratio.annotation high, low = matrix.absolute_extreme_coefficient_ratio(model) ann["data"] = high / low # Inverse the Boolean: 0.0 = good; 1.0 = bad. ann["metric"] = 1.0 - float(ann["data"] < threshold) ann["message"] = wrapper.fill( """The ratio of the absolute values of the largest coefficient {} and the lowest, non-zero coefficient {} is: {:.3G}.""".format( high, low, ann["data"])) assert ann["data"] < threshold, ann["message"]
def test_find_pure_metabolic_reactions(model): """ Expect at least one pure metabolic reaction to be defined in the model. If a reaction is neither a transport reaction, a biomass reaction nor a boundary reaction, it is counted as a purely metabolic reaction. This test requires the presence of metabolite formula to be able to identify transport reactions. This test is passed when the model contains at least one purely metabolic reaction i.e. a conversion of one metabolite into another. Implementation: From the list of all reactions, those that are boundary, transport and biomass reactions are removed and the remainder assumed to be pure metabolic reactions. Boundary reactions are identified using the attribute cobra.Model.boundary. Please read the description of "Transport Reactions" and "Biomass Reaction Identified" to learn how they are identified. """ ann = test_find_pure_metabolic_reactions.annotation ann["data"] = get_ids( basic.find_pure_metabolic_reactions(model)) ann["metric"] = len(ann["data"]) / len(model.reactions) ann["message"] = wrapper.fill( """A total of {:d} ({:.2%}) purely metabolic reactions are defined in the model, this excludes transporters, exchanges, or pseudo-reactions: {}""".format(len(ann["data"]), ann["metric"], truncate(ann["data"]))) assert len(ann["data"]) >= 1, ann["message"]
def test_find_constrained_pure_metabolic_reactions(model): """ Expect zero or more purely metabolic reactions to have fixed constraints. If a reaction is neither a transport reaction, a biomass reaction nor a boundary reaction, it is counted as a purely metabolic reaction. This test requires the presence of metabolite formula to be able to identify transport reactions. This test simply reports the number of purely metabolic reactions that have fixed constraints and does not have any mandatory 'pass' criteria. Implementation: From the pool of pure metabolic reactions identify reactions which are constrained to values other than the model's minimal or maximal possible bounds. """ ann = test_find_constrained_pure_metabolic_reactions.annotation pmr = basic.find_pure_metabolic_reactions(model) ann["data"] = get_ids_and_bounds( [rxn for rxn in pmr if basic.is_constrained_reaction( model, rxn)]) ann["metric"] = len(ann["data"]) / len(pmr) ann["message"] = wrapper.fill( """A total of {:d} ({:.2%}) purely metabolic reactions have fixed constraints in the model, this excludes transporters, exchanges, or pseudo-reactions: {}""".format(len(ann["data"]), ann["metric"], truncate(ann["data"])))
def test_metabolic_reaction_specific_sbo_presence(read_only_model): """Expect all metabolic reactions to be annotated with SBO:0000176. SBO:0000176 represents the term 'biochemical reaction'. Every metabolic reaction that is not a transport or boundary reaction should be annotated with this. The results shown are relative to the total amount of pure metabolic reactions. """ ann = test_metabolic_reaction_specific_sbo_presence.annotation pure = basic.find_pure_metabolic_reactions(read_only_model) ann["data"] = get_ids(sbo.check_component_for_specific_sbo_term( pure, "SBO:0000176")) try: ann["metric"] = len(ann["data"]) / len(pure) ann["message"] = wrapper.fill( """A total of {} metabolic reactions ({:.2%} of all purely metabolic reactions) lack annotation with the SBO term "SBO:0000176" for 'biochemical reaction': {}""".format( len(ann["data"]), ann["metric"], truncate(ann["data"]))) except ZeroDivisionError: ann["metric"] = 1.0 ann["message"] = "The model has no metabolic reactions." pytest.skip(ann["message"]) assert len(ann["data"]) == len(pure), ann["message"]
def test_find_reactions_unbounded_flux_default_condition(read_only_model): """ Expect the fraction of unbounded reactions to be low. A large fraction of model reactions able to carry unlimited flux under default conditions indicates problems with reaction directionality, missing cofactors, incorrectly defined transport reactions and more. """ # TODO: Arbitrary threshold right now! Update after meta study! ann = test_find_reactions_unbounded_flux_default_condition.annotation unbounded_rxns, fraction, _ = \ consistency.find_reactions_with_unbounded_flux_default_condition( read_only_model ) ann["data"] = get_ids(unbounded_rxns) ann["metric"] = fraction ann["message"] = wrapper.fill( """ A fraction of {:.2%} of the non-blocked reactions (in total {} reactions) can carry unbounded flux in the default model condition. Unbounded reactions may be involved in thermodynamically infeasible cycles: {}""".format( len(ann["data"]), ann["metric"], truncate(ann["data"]) ) ) assert ann["metric"] <= 0.1, ann["message"]
def test_exchange_specific_sbo_presence(read_only_model): """Expect all exchange reactions to be annotated with SBO:0000627. SBO:0000627 represents the term 'exchange reaction'. The Systems Biology Ontology defines an exchange reaction as follows: 'A modeling process to provide matter influx or efflux to a model, for example to replenish a metabolic network with raw materials (eg carbon / energy sources). Such reactions are conceptual, created solely for modeling purposes, and do not have a physical correspondence. Exchange reactions, often represented as 'R_EX_', can operate in the negative (uptake) direction or positive (secretion) direction. By convention, a negative flux through an exchange reaction represents uptake of the corresponding metabolite, and a positive flux represent discharge.' Every exchange reaction should be annotated with this. Exchange reactions differ from demand reactions in that the metabolites are removed from or added to the extracellular environment only. """ ann = test_exchange_specific_sbo_presence.annotation exchanges = helpers.find_exchange_rxns(read_only_model) ann["data"] = get_ids(sbo.check_component_for_specific_sbo_term( exchanges, "SBO:0000627")) try: ann["metric"] = len(ann["data"]) / len(exchanges) ann["message"] = wrapper.fill( """A total of {} exchange reactions ({:.2%} of all exchange reactions) lack annotation with the SBO term "SBO:0000627" for 'exchange reaction': {}""".format( len(ann["data"]), ann["metric"], truncate(ann["data"]))) except ZeroDivisionError: ann["metric"] = 1.0 ann["message"] = "The model has no exchange reactions." pytest.skip(ann["message"]) assert len(ann["data"]) == len(exchanges), ann["message"]
def test_metabolite_annotation_overview(read_only_model, db): """ Expect all metabolites to have annotations from common databases. Specific database cross-references are paramount to mapping information. To provide references to as many databases as possible helps to make the metabolic model more accessible to other researchers. This does not only facilitate the use of a model in a broad array of computational pipelines, it also promotes the metabolic model itself to become an organism-specific knowledge base. For this test to pass, each metabolite annotation should contain cross-references to a number of databases (listed in `annotation.py`). For each database this test checks for the presence of its corresponding namespace ID to comply with the MIRIAM guidelines i.e. they have to match those defined on https://identifiers.org/. Since each database is quite different and some potentially incomplete, it may not be feasible to achieve 100% coverage for each of them. Generally it should be possible, however, to obtain cross-references to at least one of the databases for all metabolites consistently. """ ann = test_metabolite_annotation_overview.annotation ann["data"][db] = get_ids( annotation.generate_component_annotation_overview( read_only_model.metabolites, db)) # TODO: metric must also be a dict in this case. ann["metric"][db] = len(ann["data"][db]) / len(read_only_model.metabolites) ann["message"][db] = wrapper.fill( """The following {} metabolites ({:.2%}) lack annotation for {}: {}""".format(len(ann["data"][db]), ann["metric"][db], db, truncate(ann["data"][db]))) assert len(ann["data"][db]) == 0, ann["message"][db]
def test_growth_from_data_qualitative(model, experiment, threshold=0.95): """ Expect a perfect accuracy when predicting growth. The in-silico growth prediction is compared with experimental data and the accuracy is expected to be better than 0.95. In principal, Matthews' correlation coefficient is a more comprehensive metric but is a little fragile to not having any false negatives or false positives in the output. """ ann = test_growth_from_data_qualitative.annotation exp = pytest.memote.experimental.growth[experiment] expected = exp.data test = exp.evaluate(model) # Growth data sets need not use unique exchange reactions thus we use the # numeric index here to compute the confusion matrix. ann["data"][experiment] = confusion_matrix( set(test.loc[test["growth"], "exchange"].index), set(expected.loc[expected["growth"], "exchange"].index), set(test.loc[~test["growth"], "exchange"].index), set(expected.loc[~expected["growth"], "exchange"].index)) ann["metric"][experiment] = ann["data"][experiment]["ACC"] ann["message"][experiment] = wrapper.fill( """Ideally, every model would show a perfect accuracy of 1. In experiment '{}' the model has {:.2}.""".format( experiment, ann["data"][experiment]["MCC"])) assert ann["data"][experiment]["ACC"] > threshold
def test_gene_product_annotation_wrong_ids(model, db): """ Expect all annotations of genes/gene-products to be in the correct format. To identify databases and the identifiers belonging to them, computational tools rely on the presence of specific patterns. Only when these patterns can be identified consistently is an ID truly machine-readable. This test checks if the database cross-references in reaction annotations conform to patterns defined according to the MIRIAM guidelines, i.e. matching those that are defined at https://identifiers.org/. The required formats, i.e., regex patterns are further outlined in `annotation.py`. This test does not carry out a web query for the composed URI, it merely controls that the regex patterns match the identifiers. Implementation: For those genes whose annotation keys match any of the tested databases, check if the corresponding values match the identifier pattern of each database. """ ann = test_gene_product_annotation_wrong_ids.annotation ann["data"][db] = total = get_ids( set(model.genes).difference( annotation.generate_component_annotation_overview( model.genes, db))) ann["metric"][db] = 1.0 ann["message"][db] = wrapper.fill( """There are no gene annotations for the {} database. """.format(db)) assert len(total) > 0, ann["message"][db] ann["data"][db] = get_ids( annotation.generate_component_annotation_miriam_match( model.genes, "genes", db)) ann["metric"][db] = len(ann["data"][db]) / len(model.genes) ann["message"][db] = wrapper.fill( """A total of {} gene annotations ({:.2%}) do not match the regular expression patterns defined on identifiers.org for the {} database: {}""".format( len(ann["data"][db]), ann["metric"][db], db, truncate(ann["data"][db]))) assert len(ann["data"][db]) == 0, ann["message"][db]
def test_matrix_rank(model): """ Show the rank of the stoichiometric matrix. The rank of the stoichiometric matrix is system specific. It is calculated using singular value decomposition (SVD). Implementation: Compose the stoichiometric matrix, then estimate the rank, i.e. the dimension of the column space, of a matrix. The algorithm used by this function is based on the singular value decomposition of the matrix. """ ann = test_matrix_rank.annotation ann["data"] = matrix.matrix_rank(model) # Report the rank scaled by the number of reactions. ann["metric"] = ann["data"] / len(model.reactions) ann["message"] = wrapper.fill( """The rank of the S-Matrix is {}.""".format(ann["data"]))
def test_find_transport_reactions(model): """ Expect >= 1 transport reactions are present in the model. Cellular metabolism in any organism usually involves the transport of metabolites across a lipid bi-layer. This test reports how many of these reactions, which transports metabolites from one compartment to another, are present in the model, as at least one transport reaction must be present for cells to take up nutrients and/or excrete waste. Implementation: A transport reaction is defined as follows: 1. It contains metabolites from at least 2 compartments and 2. at least 1 metabolite undergoes no chemical reaction, i.e., the formula and/or annotation stays the same on both sides of the equation. A notable exception is transport via PTS, which also contains the following restriction: 3. The transported metabolite(s) are transported into a compartment through the exchange of a phosphate. An example of transport via PTS would be pep(c) + glucose(e) -> glucose-6-phosphate(c) + pyr(c) Reactions similar to transport via PTS (referred to as "modified transport reactions") follow a similar pattern: A(x) + B-R(y) -> A-R(y) + B(y) Such modified transport reactions can be detected, but only when the formula is defined for all metabolites in a particular reaction. If this is not the case, transport reactions are identified through annotations, which cannot detect modified transport reactions. """ ann = test_find_transport_reactions.annotation ann["data"] = get_ids(helpers.find_transport_reactions(model)) ann["metric"] = len(ann["data"]) / len(model.reactions) ann["message"] = wrapper.fill( """A total of {:d} ({:.2%}) transport reactions are defined in the model, this excludes purely metabolic reactions, exchanges, or pseudo-reactions: {}""".format( len(ann["data"]), ann["metric"], truncate(ann["data"]))) assert len(ann["data"]) >= 1, ann["message"]
def test_biomass_specific_sbo_presence(model): """Expect all biomass reactions to be annotated with SBO:0000629. SBO:0000629 represents the term 'biomass production'. The Systems Biology Ontology defines an exchange reaction as follows: 'Biomass production, often represented 'R_BIOMASS_', is usually the optimization target reaction of constraint-based models, and can consume multiple reactants to produce multiple products. It is also assumed that parts of the reactants are also consumed in unrepresented processes and hence products do not have to reflect all the atom composition of the reactants. Formulation of a biomass production process entails definition of the macromolecular content (eg. cellular protein fraction), metabolic constitution of each fraction (eg. amino acids), and subsequently the atomic composition (eg. nitrogen atoms). More complex biomass functions can additionally incorporate details of essential vitamins and cofactors required for growth.' Every reaction representing the biomass production should be annotated with this. Implementation: Check if each biomass reaction has a non-zero "annotation" attribute that contains the key "sbo" with the associated value being one of the SBO terms above. """ ann = test_biomass_specific_sbo_presence.annotation biomass = helpers.find_biomass_reaction(model) ann["data"] = get_ids(sbo.check_component_for_specific_sbo_term( biomass, "SBO:0000629")) try: ann["metric"] = len(ann["data"]) / len(biomass) except ZeroDivisionError: ann["metric"] = 1.0 ann["message"] = "No biomass reactions found." pytest.skip(ann["message"]) ann["message"] = wrapper.fill( """A total of {} biomass reactions ({:.2%} of all biomass reactions) lack annotation with the SBO term "SBO:0000629" for 'biomass production': {}""".format( len(ann["data"]), ann["metric"], truncate(ann["data"]) )) assert len(ann["data"]) == 0, ann["message"]