def test_bioentities_get_when_exists(): e = collections.BioEntities({ Curie("FOO", "123"): Subject(Curie("FOO", "123"), "hello", "world", [], "protien", Curie("NCBITaxon", "12345")) }) assert e.get(Curie.from_str("FOO:123")) == Subject( Curie("FOO", "123"), "hello", "world", [], "protien", Curie("NCBITaxon", "12345"))
def test_bioentities_load_from_file(): pombase = collections.BioEntities.load_from_file( "tests/resources/truncated-pombase.gpi") assert len(pombase.entities.keys()) == 199 # Has 199 gpi lines in the file assert pombase.get(Curie.from_str("PomBase:SPAC1565.04c")) == Subject( Curie.from_str("PomBase:SPAC1565.04c"), "ste4", "adaptor protein Ste4", [], "protein", Curie.from_str("NCBITaxon:4896"))
def test_bioentities_merge_clobber(): e = collections.BioEntities({ Curie("FOO", "123"): Subject(Curie("FOO", "123"), "hello", "world", [], "protien", Curie("NCBITaxon", "12345")) }) o = collections.BioEntities({ Curie("FOO", "123"): Subject(Curie("FOO", "123"), "different", "world", [], "dog", Curie("NCBITaxon", "12345")) }) # Get the clobbered key, the value should be the subject in `o` assert e.merge(o).get(Curie("FOO", "123")) == Subject(Curie("FOO", "123"), "different", "world", [], "dog", Curie("NCBITaxon", "12345"))
def test_bioentities_merge(): e = collections.BioEntities({ Curie("FOO", "123"): Subject(Curie("FOO", "123"), "hello", "world", [], "protien", Curie("NCBITaxon", "12345")) }) o = collections.BioEntities({ Curie("BAR", "987"): Subject(Curie("BAR", "987"), "goodbye", "world", [], "protien", Curie("NCBITaxon", "999")) }) assert e.merge(o) == collections.BioEntities({ Curie("FOO", "123"): Subject(Curie("FOO", "123"), "hello", "world", [], "protien", Curie("NCBITaxon", "12345")), Curie("BAR", "987"): Subject(Curie("BAR", "987"), "goodbye", "world", [], "protien", Curie("NCBITaxon", "999")) })
def test_bioentities_from_gpi_2_0(): entities = collections.BioEntities.load_from_file( "tests/resources/mgi.truncated.gpi2") assert entities.get(Curie( namespace="MGI", identity="MGI:1918925")) == Subject( id=Curie.from_str("MGI:MGI:1918925"), label="0610010F05Rik", fullname=["RIKEN cDNA 0610010F05 gene"], synonyms=[], type=[Curie.from_str("SO:0001217")], taxon=Curie(namespace="NCBITaxon", identity="10090"), db_xrefs=[Curie.from_str("UniProtKB:Q68FF0")])
def test_gaf_writer(): association = GoAssociation( source_line="", subject=Subject( id=Curie("PomBase", "SPAC25B8.17"), label="ypf1", type=["protein"], fullname=[ "intramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)" ], synonyms=["ppp81"], taxon=Curie("NCBITaxon", "4896")), object=Term(id=Curie("GO", "0000006"), taxon=Curie("NCBITaxon", "4896")), negated=False, qualifiers=[], aspect=Aspect("C"), relation=Curie("BFO", "0000050"), interacting_taxon=Curie("NCBITaxon", "555"), evidence=Evidence( type=Curie("ECO", "0000266"), has_supporting_reference=[Curie("GO_REF", "0000024")], with_support_from=[ ConjunctiveSet(elements=[Curie("SGD", "S000001583")]) ]), provided_by=Provider("PomBase"), date=Date(year="2015", month="03", day="05", time=""), subject_extensions=[ ExtensionUnit(relation=Curie("rdfs", "subClassOf"), term=Curie("UniProtKB", "P12345")) ], object_extensions=[ ConjunctiveSet(elements=[ ExtensionUnit(relation=Curie("BFO", "0000050"), term=Curie("X", "1")) ]) ], properties=dict()) out = io.StringIO() writer = assocwriter.GafWriter(file=out) # `out` will get written with gaf lines from the above assocation object expected = "PomBase\tSPAC25B8.17\typf1\t\tGO:0000006\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896|taxon:555\t20150305\tPomBase\tpart_of(X:1)\tUniProtKB:P12345" writer.write_assoc(association) print(out.getvalue()) gaf = [ line.strip("\n") for line in out.getvalue().split("\n") if not line.startswith("!") ][0] assert expected == gaf
def load_from_file(BioEntities, path: str): entities = dict() # type: Dict[Curie, Subject] try: gpi_parser = entityparser.GpiParser() with open(path) as gpi: for line in gpi: _, ents = gpi_parser.parse_line(line) for entity in ents: # entity will be a well-formed curie entity_id = Curie.from_str(entity["id"]) entities[entity_id] = Subject( entity_id, entity["label"], entity["full_name"], entity["synonyms"], entity["type"], Curie.from_str(entity["taxon"]["id"])) except Exception as e: logger.error("Failed to read GPI file: {}".format(str(e))) return BioEntities(entities)