Example #1
0
def test_bioentities_get_when_exists():
    e = collections.BioEntities({
        Curie("FOO", "123"):
        Subject(Curie("FOO", "123"), "hello", "world", [], "protien",
                Curie("NCBITaxon", "12345"))
    })
    assert e.get(Curie.from_str("FOO:123")) == Subject(
        Curie("FOO", "123"), "hello", "world", [], "protien",
        Curie("NCBITaxon", "12345"))
Example #2
0
def test_bioentities_load_from_file():
    pombase = collections.BioEntities.load_from_file(
        "tests/resources/truncated-pombase.gpi")
    assert len(pombase.entities.keys()) == 199  # Has 199 gpi lines in the file
    assert pombase.get(Curie.from_str("PomBase:SPAC1565.04c")) == Subject(
        Curie.from_str("PomBase:SPAC1565.04c"), "ste4", "adaptor protein Ste4",
        [], "protein", Curie.from_str("NCBITaxon:4896"))
Example #3
0
def test_bioentities_merge_clobber():
    e = collections.BioEntities({
        Curie("FOO", "123"):
        Subject(Curie("FOO", "123"), "hello", "world", [], "protien",
                Curie("NCBITaxon", "12345"))
    })

    o = collections.BioEntities({
        Curie("FOO", "123"):
        Subject(Curie("FOO", "123"), "different", "world", [], "dog",
                Curie("NCBITaxon", "12345"))
    })
    # Get the clobbered key, the value should be the subject in `o`
    assert e.merge(o).get(Curie("FOO",
                                "123")) == Subject(Curie("FOO",
                                                         "123"), "different",
                                                   "world", [], "dog",
                                                   Curie("NCBITaxon", "12345"))
Example #4
0
def test_bioentities_merge():
    e = collections.BioEntities({
        Curie("FOO", "123"):
        Subject(Curie("FOO", "123"), "hello", "world", [], "protien",
                Curie("NCBITaxon", "12345"))
    })

    o = collections.BioEntities({
        Curie("BAR", "987"):
        Subject(Curie("BAR", "987"), "goodbye", "world", [], "protien",
                Curie("NCBITaxon", "999"))
    })

    assert e.merge(o) == collections.BioEntities({
        Curie("FOO", "123"):
        Subject(Curie("FOO", "123"), "hello", "world", [], "protien",
                Curie("NCBITaxon", "12345")),
        Curie("BAR", "987"):
        Subject(Curie("BAR", "987"), "goodbye", "world", [], "protien",
                Curie("NCBITaxon", "999"))
    })
Example #5
0
def test_bioentities_from_gpi_2_0():
    entities = collections.BioEntities.load_from_file(
        "tests/resources/mgi.truncated.gpi2")
    assert entities.get(Curie(
        namespace="MGI", identity="MGI:1918925")) == Subject(
            id=Curie.from_str("MGI:MGI:1918925"),
            label="0610010F05Rik",
            fullname=["RIKEN cDNA 0610010F05 gene"],
            synonyms=[],
            type=[Curie.from_str("SO:0001217")],
            taxon=Curie(namespace="NCBITaxon", identity="10090"),
            db_xrefs=[Curie.from_str("UniProtKB:Q68FF0")])
Example #6
0
def test_gaf_writer():
    association = GoAssociation(
        source_line="",
        subject=Subject(
            id=Curie("PomBase", "SPAC25B8.17"),
            label="ypf1",
            type=["protein"],
            fullname=[
                "intramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)"
            ],
            synonyms=["ppp81"],
            taxon=Curie("NCBITaxon", "4896")),
        object=Term(id=Curie("GO", "0000006"),
                    taxon=Curie("NCBITaxon", "4896")),
        negated=False,
        qualifiers=[],
        aspect=Aspect("C"),
        relation=Curie("BFO", "0000050"),
        interacting_taxon=Curie("NCBITaxon", "555"),
        evidence=Evidence(
            type=Curie("ECO", "0000266"),
            has_supporting_reference=[Curie("GO_REF", "0000024")],
            with_support_from=[
                ConjunctiveSet(elements=[Curie("SGD", "S000001583")])
            ]),
        provided_by=Provider("PomBase"),
        date=Date(year="2015", month="03", day="05", time=""),
        subject_extensions=[
            ExtensionUnit(relation=Curie("rdfs", "subClassOf"),
                          term=Curie("UniProtKB", "P12345"))
        ],
        object_extensions=[
            ConjunctiveSet(elements=[
                ExtensionUnit(relation=Curie("BFO", "0000050"),
                              term=Curie("X", "1"))
            ])
        ],
        properties=dict())
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out)
    # `out` will get written with gaf lines from the above assocation object
    expected = "PomBase\tSPAC25B8.17\typf1\t\tGO:0000006\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896|taxon:555\t20150305\tPomBase\tpart_of(X:1)\tUniProtKB:P12345"
    writer.write_assoc(association)
    print(out.getvalue())
    gaf = [
        line.strip("\n") for line in out.getvalue().split("\n")
        if not line.startswith("!")
    ][0]
    assert expected == gaf
Example #7
0
    def load_from_file(BioEntities, path: str):
        entities = dict()  # type: Dict[Curie, Subject]
        try:
            gpi_parser = entityparser.GpiParser()
            with open(path) as gpi:
                for line in gpi:
                    _, ents = gpi_parser.parse_line(line)
                    for entity in ents:
                        # entity will be a well-formed curie
                        entity_id = Curie.from_str(entity["id"])
                        entities[entity_id] = Subject(
                            entity_id, entity["label"], entity["full_name"],
                            entity["synonyms"], entity["type"],
                            Curie.from_str(entity["taxon"]["id"]))
        except Exception as e:
            logger.error("Failed to read GPI file: {}".format(str(e)))

        return BioEntities(entities)