Beispiel #1
0
def test_metabolite_annotation_overview(read_only_model, db):
    """
    Expect all metabolites to have annotations from common databases.

    Specific database cross-references are paramount to mapping information.
    To provide references to as many databases as possible helps to make the
    metabolic model more accessible to other researchers. This does not only
    facilitate the use of a model in a broad array of computational pipelines,
    it also promotes the metabolic model itself to become an organism-specific
    knowledge base.

    For this test to pass, each metabolite annotation should contain
    cross-references to a number of databases (listed in `annotation.py`).
    For each database this test checks for the presence of its corresponding
    namespace ID to comply with the MIRIAM guidelines i.e. they have to match
    those defined on https://identifiers.org/.

    Since each database is quite different and some potentially incomplete, it
    may not be feasible to achieve 100% coverage for each of them. Generally
    it should be possible, however, to obtain cross-references to at least
    one of the databases for all metabolites consistently.
    """
    ann = test_metabolite_annotation_overview.annotation
    ann["data"][db] = get_ids(
        annotation.generate_component_annotation_overview(
            read_only_model.metabolites, db))
    # TODO: metric must also be a dict in this case.
    ann["metric"][db] = len(ann["data"][db]) / len(read_only_model.metabolites)
    ann["message"][db] = wrapper.fill(
        """The following {} metabolites ({:.2%}) lack annotation for {}:
        {}""".format(len(ann["data"][db]), ann["metric"][db], db,
                     truncate(ann["data"][db])))
    assert len(ann["data"][db]) == 0, ann["message"][db]
Beispiel #2
0
def test_generate_component_annotation_overview(model, num, components):
    """
    Expect all components to have `num` annotations from common databases.

    The required databases are outlined in `annotation.py`.
    """
    overview = \
        annotation.generate_component_annotation_overview(model, components)
    for key in overview.columns:
        assert overview[key].sum() == num
Beispiel #3
0
def test_generate_reaction_annotation_overview(model, num, db):
    """
    Expect all reactions to have `num` annotations from common databases.

    The required databases are outlined in `annotation.py`.
    """
    overview = \
        annotation.generate_component_annotation_overview(
            model.reactions, db)
    assert len(overview) == num
Beispiel #4
0
def test_reaction_annotation_overview(read_only_model, store):
    """
    Expect all reactions to have annotations from common databases.

    The required databases are outlined in `annotation.py`.
    """
    overview = annotation.generate_component_annotation_overview(
        read_only_model, "reactions")
    store['rxn_annotation_overview'] = df2dict(overview)
    for db in annotation.REACTION_ANNOTATIONS:
        sub = overview.loc[~overview[db], db]
        assert len(sub) == 0, \
            "The following reactions lack annotation for {}: " \
            "{}".format(db, ", ".join(sub.index))
Beispiel #5
0
def test_metabolite_annotation_wrong_ids(read_only_model, store):
    """
    Expect all annotations of metabolites to be in the correct format.

    The required formats, i.e., regex patterns are outlined in `annotation.py`.
    """
    has_annotation = annotation.generate_component_annotation_overview(
        read_only_model, "metabolites")
    matches = annotation.generate_component_annotation_miriam_match(
        read_only_model, "metabolites")
    wrong = DataFrame(has_annotation.values & (~matches.values),
                      index=has_annotation.index,
                      columns=has_annotation.columns)
    store['met_wrong_annotation_ids'] = df2dict(wrong)
    for db in annotation.METABOLITE_ANNOTATIONS:
        sub = wrong.loc[wrong[db], db]
        assert len(sub) == 0, \
            "The following metabolites use wrong IDs for {}: " \
            "{}".format(db, ", ".join(sub.index))
Beispiel #6
0
def test_gene_product_annotation_wrong_ids(model, db):
    """
    Expect all annotations of genes/gene-products to be in the correct format.

    To identify databases and the identifiers belonging to them, computational
    tools rely on the presence of specific patterns. Only when these patterns
    can be identified consistently is an ID truly machine-readable. This test
    checks if the database cross-references in reaction annotations conform
    to patterns defined according to the MIRIAM guidelines, i.e. matching
    those that are defined at https://identifiers.org/.

    The required formats, i.e., regex patterns are further outlined in
    `annotation.py`. This test does not carry out a web query for the composed
    URI, it merely controls that the regex patterns match the identifiers.

    Implementation:
    For those genes whose annotation keys match any of the tested
    databases, check if the corresponding values match the identifier pattern
    of each database.

    """
    ann = test_gene_product_annotation_wrong_ids.annotation
    ann["data"][db] = total = get_ids(
        set(model.genes).difference(
            annotation.generate_component_annotation_overview(
                model.genes, db)))
    ann["metric"][db] = 1.0
    ann["message"][db] = wrapper.fill(
        """There are no gene annotations for the {} database.
        """.format(db))
    assert len(total) > 0, ann["message"][db]
    ann["data"][db] = get_ids(
        annotation.generate_component_annotation_miriam_match(
            model.genes, "genes", db))
    ann["metric"][db] = len(ann["data"][db]) / len(model.genes)
    ann["message"][db] = wrapper.fill(
        """A total of {} gene annotations ({:.2%}) do not match the
        regular expression patterns defined on identifiers.org for the {}
        database: {}""".format(
            len(ann["data"][db]), ann["metric"][db], db,
            truncate(ann["data"][db])))
    assert len(ann["data"][db]) == 0, ann["message"][db]
Beispiel #7
0
def test_gene_product_annotation_overview(model, db):
    """
    Expect all genes to have annotations from common databases.

    Specific database cross-references are paramount to mapping information.
    To provide references to as many databases as possible helps to make the
    metabolic model more accessible to other researchers. This does not only
    facilitate the use of a model in a broad array of computational pipelines,
    it also promotes the metabolic model itself to become an organism-specific
    knowledge base.

    For this test to pass, each gene annotation should contain
    cross-references to a number of databases. The currently selection is
    listed in `annotation.py`, but an ongoing discussion can be found at
    https://github.com/opencobra/memote/issues/332. For each database this
    test checks for the presence of its corresponding namespace ID to comply
    with the MIRIAM guidelines i.e. they have to match those defined on
    https://identifiers.org/.

    Since each database is quite different and some potentially incomplete, it
    may not be feasible to achieve 100% coverage for each of them. Generally
    it should be possible, however, to obtain cross-references to at least
    one of the databases for all gene products consistently.

    Implementation:
    Check if the keys of the annotation attribute of each cobra.Gene of
    the model match with a selection of common genome databases. The
    annotation  attribute of cobrapy components is a dictionary of
    key:value pairs.

    """
    ann = test_gene_product_annotation_overview.annotation
    ann["data"][db] = get_ids(
        annotation.generate_component_annotation_overview(
            model.genes, db))
    ann["metric"][db] = len(ann["data"][db]) / len(model.genes)
    ann["message"][db] = wrapper.fill(
        """The following {} genes ({:.2%}) lack annotation for {}:
        {}""".format(len(ann["data"][db]), ann["metric"][db], db,
                     truncate(ann["data"][db])))
    assert len(ann["data"][db]) == 0, ann["message"][db]
Beispiel #8
0
def test_gene_product_annotation_wrong_ids(model, db):
    """
    Expect all annotations of genes/gene-products to be in the correct format.

    To identify databases and the identifiers belonging to them, computational
    tools rely on the presence of specific patterns. Only when these patterns
    can be identified consistently is an ID truly machine-readable. This test
    checks if the database cross-references in reaction annotations conform
    to patterns defined according to the MIRIAM guidelines, i.e. matching
    those that are defined at https://identifiers.org/.

    The required formats, i.e., regex patterns are further outlined in
    `annotation.py`. This test does not carry out a web query for the composed
    URI, it merely controls that the regex patterns match the identifiers.

    Implementation:
    For those genes whose annotation keys match any of the tested
    databases, check if the corresponding values match the identifier pattern
    of each database.

    """
    ann = test_gene_product_annotation_wrong_ids.annotation
    ann["data"][db] = total = get_ids(
        set(model.genes).difference(
            annotation.generate_component_annotation_overview(model.genes,
                                                              db)))
    ann["metric"][db] = 1.0
    ann["message"][db] = wrapper.fill(
        """There are no gene annotations for the {} database.
        """.format(db))
    assert len(total) > 0, ann["message"][db]
    ann["data"][db] = get_ids(
        annotation.generate_component_annotation_miriam_match(
            model.genes, "genes", db))
    ann["metric"][db] = len(ann["data"][db]) / len(model.genes)
    ann["message"][db] = wrapper.fill(
        """A total of {} gene annotations ({:.2%}) do not match the
        regular expression patterns defined on identifiers.org for the {}
        database: {}""".format(len(ann["data"][db]), ann["metric"][db], db,
                               truncate(ann["data"][db])))
    assert len(ann["data"][db]) == 0, ann["message"][db]
Beispiel #9
0
def test_gene_product_annotation_overview(model, db):
    """
    Expect all genes to have annotations from common databases.

    Specific database cross-references are paramount to mapping information.
    To provide references to as many databases as possible helps to make the
    metabolic model more accessible to other researchers. This does not only
    facilitate the use of a model in a broad array of computational pipelines,
    it also promotes the metabolic model itself to become an organism-specific
    knowledge base.

    For this test to pass, each gene annotation should contain
    cross-references to a number of databases. The currently selection is
    listed in `annotation.py`, but an ongoing discussion can be found at
    https://github.com/opencobra/memote/issues/332. For each database this
    test checks for the presence of its corresponding namespace ID to comply
    with the MIRIAM guidelines i.e. they have to match those defined on
    https://identifiers.org/.

    Since each database is quite different and some potentially incomplete, it
    may not be feasible to achieve 100% coverage for each of them. Generally
    it should be possible, however, to obtain cross-references to at least
    one of the databases for all gene products consistently.

    Implementation:
    Check if the keys of the annotation attribute of each cobra.Gene of
    the model match with a selection of common genome databases. The
    annotation  attribute of cobrapy components is a dictionary of
    key:value pairs.

    """
    ann = test_gene_product_annotation_overview.annotation
    ann["data"][db] = get_ids(
        annotation.generate_component_annotation_overview(model.genes, db))
    ann["metric"][db] = len(ann["data"][db]) / len(model.genes)
    ann["message"][db] = wrapper.fill(
        """The following {} genes ({:.2%}) lack annotation for {}:
        {}""".format(len(ann["data"][db]), ann["metric"][db], db,
                     truncate(ann["data"][db])))
    assert len(ann["data"][db]) == 0, ann["message"][db]
Beispiel #10
0
def test_reaction_annotation_wrong_ids(read_only_model, db):
    """
    Expect all annotations of reactions to be in the correct format.

    To identify databases and the identifiers belonging to them, computational
    tools rely on the presence of specific patterns. Only when these patterns
    can be identified consistently is an ID truly machine-readable. This test
    checks if the database cross-references in reaction annotations conform
    to patterns defined according to the MIRIAM guidelines, i.e. matching
    those that are defined at https://identifiers.org/.

    The required formats, i.e., regex patterns are further outlined in
    `annotation.py`. This test does not carry out a web query for the composed
    URI, it merely controls that the regex patterns match the identifiers.
    """
    ann = test_reaction_annotation_wrong_ids.annotation
    ann["data"][db] = total = get_ids(
        set(read_only_model.reactions).difference(
            annotation.generate_component_annotation_overview(
                read_only_model.reactions, db)))
    ann["metric"][db] = 1.0
    ann["message"][db] = wrapper.fill(
        """There are no reaction annotations for the {} database.
        """.format(db))
    assert len(total) > 0, ann["message"][db]
    ann["data"][db] = get_ids(
        annotation.generate_component_annotation_miriam_match(
            read_only_model.reactions, "reactions", db))
    ann["metric"][db] = len(ann["data"][db]) / len(read_only_model.reactions)
    ann["message"][db] = wrapper.fill(
        """The provided reaction annotations for the {} database do not match
        the regular expression patterns defined on identifiers.org. A total of
        {} reaction annotations ({:.2%}) needs to be fixed: {}""".format(
            db, len(ann["data"][db]), ann["metric"][db],
            truncate(ann["data"][db])))
    assert len(ann["data"][db]) == 0, ann["message"][db]