def test_errors_gaf(): config = assocparser.AssocParserConfig( ecomap=EcoMap() ) p = GafParser(config=config) assocs = p.parse(open("tests/resources/errors.gaf", "r"), skipheader=True) msgs = p.report.messages print(json.dumps(p.report.to_report_json(), indent=4)) # print("MESSAGES: {}".format(len(msgs))) n_invalid_idspace = 0 for m in msgs: print("MESSAGE: {}".format(m)) if m['type'] == assocparser.Report.INVALID_IDSPACE: n_invalid_idspace += 1 assert len(msgs) == 13 assert n_invalid_idspace == 1 assert len(assocs) == 2 w = GafWriter() w.write(assocs) for a in assocs: if a.object_extensions != []: # our test file has no ORs, so in DNF this is always the first xs = a.object_extensions[0].elements print(xs) for x in xs: print('X: {}'.format(x)) # ensure that invalid expressions have been eliminated assert x.relation == association.Curie("BFO", "0000050") assert x.term == association.Curie.from_str('X:1') assert len(xs) == 1
def test_errors_gaf(): p = GafParser() p.config.ecomap = EcoMap() assocs = p.parse(open("tests/resources/errors.gaf", "r"), skipheader=True) msgs = p.report.messages print("MESSAGES: {}".format(len(msgs))) n_invalid_idspace = 0 for m in msgs: print("MESSAGE: {}".format(m)) if m['type'] == assocparser.Report.INVALID_IDSPACE: n_invalid_idspace += 1 assert len(msgs) == 17 assert n_invalid_idspace == 1 # we expect 6 assert len(assocs) == 6 w = GafWriter() w.write(assocs) for a in assocs: if 'object_extensions' in a: # our test file has no ORs, so in DNF this is always the first xs = a['object_extensions']['union_of'][0]['intersection_of'] for x in xs: print('X: {}'.format(x)) # ensure that invalid expressions have been eliminated assert x['property'] == 'foo' assert x['filler'] == 'X:1' assert len(xs) == 1
def test_errors_gaf(): p = GafParser() p.config.ecomap = EcoMap() assocs = p.parse(open("tests/resources/errors.gaf", "r")) msgs = p.report.messages print("MESSAGES: {}".format(len(msgs))) for m in msgs: print("MESSAGE: {}".format(m)) assert len(msgs) == 15 # we expect 4 assert len(assocs) == 7 from ontobio.io import GafWriter w = GafWriter() w.write(assocs) for a in assocs: if 'object_extensions' in a: # our test file has no ORs, so in DNF this is always the first xs = a['object_extensions']['union_of'][0]['intersection_of'] for x in xs: print('X: {}'.format(x)) # ensure that invalid expressions have been eliminated assert x['property'] == 'foo' assert x['filler'] == 'X:1' assert len(xs) == 1
def test_errors_gaf(): p = GafParser() p.config.ecomap = EcoMap() assocs = p.parse(open("tests/resources/errors.gaf", "r")) msgs = p.report.messages print("MESSAGES: {}".format(len(msgs))) for m in msgs: print("MESSAGE: {}".format(m)) assert len(msgs) == 8
def test_skim_gaf(): p = GafParser() p.config.ecomap = EcoMap() results = p.skim(open(POMBASE, "r")) assert len(results) == 370 for r in results: print(str(r)) (s, sn, o) = r assert o.startswith('GO:') assert s.startswith('PomBase:')
def test_alt_id_repair(): p = GafParser() ont = OntologyFactory().create(ALT_ID_ONT) p.config.ecomap = EcoMap() p.config.ontology = ont gaf = io.StringIO("SGD\tS000000819\tAFG3\t\tGO:0043623\tPMID:8681382|SGD_REF:S000055187\tIMP\t\tP\tMitochondrial inner membrane m-AAA protease component\tYER017C|AAA family ATPase AFG3|YTA10\tgene\ttaxon:559292\t20170428\tSGD") assocs = p.parse(gaf, skipheader=True) assert len(assocs) > 0 assert assocs[0]["object"]["id"] == "GO:0043623"
def test_skim_gpad(): p = GpadParser() p.config.ecomap = EcoMap() results = p.skim(open(POMBASE_GPAD, "r")) assert len(results) == 1984 for r in results: print(str(r)) (s, sn, o) = r assert o.startswith('GO:') assert s.startswith('PomBase:') or s.startswith('PR:')
def parse_with(f, p): p.config.ecomap = EcoMap() is_gaf = f == POMBASE ont = OntologyFactory().create(ONT) if is_gaf: # only do ontology checking on GAF parse; # this is because ontology is made from GAF p.config.ontology = ont else: p.config.ontology = None results = p.parse(open(f, "r"), skipheader=True) print(p.report.to_markdown()) r1 = results[0] # TODO: test datafile does not have ECOs yet!! assert ecomap.ecoclass_to_coderef(str( r1.evidence.type))[0] == 'ISO' or str( r1.evidence.type) == 'ECO:0000201' assert r1.evidence.with_support_from == [ association.ConjunctiveSet( [association.Curie.from_str('SGD:S000001583')]) ] assert r1.evidence.has_supporting_reference == [ association.Curie.from_str('GO_REF:0000024') ] if is_gaf: assert r1.subject.label == 'ypf1' assert association.ymd_str(r1.date, "") == '20150305' for r in results: #print(str(r)) sid = r.subject.id prov = r.provided_by assert prov == 'PomBase' or prov == 'UniProt' assert r.object.id.namespace == "GO" assert sid.namespace == 'PomBase' or (not is_gaf and sid.namespace == 'PR') if is_gaf: assert str(r.subject.taxon) == 'NCBITaxon:4896' # for m in p.report.messages: # print("MESSAGE: {}".format(m)) print("MESSAGES (sample): {}".format(p.report.messages[0:5])) # Messages that aren't about upgrading qualifiers in rule 59 should be 0 assert len([ msg for msg in p.report.messages if msg["rule"] != 59 and msg["rule"] != 27 ]) == 0
def test_alt_id_repair(): p = GafParser() ont = OntologyFactory().create(ALT_ID_ONT) p.config.ecomap = EcoMap() p.config.ontology = ont gaf = io.StringIO("SGD\tS000000819\tAFG3\t\tGO:1\tPMID:8681382|SGD_REF:S000055187\tIMP\t\tP\tMitochondrial inner membrane m-AAA protease component\tYER017C|AAA family ATPase AFG3|YTA10\tgene\ttaxon:559292\t20170428\tSGD") assocs = p.parse(gaf, skipheader=True) # GO:1 is obsolete, and has replaced by GO:0034622, so we should see that class ID. assert assocs[0]["object"]["id"] == "GO:2" gaf = io.StringIO("SGD\tS000000819\tAFG3\t\tGO:4\tPMID:8681382|SGD_REF:S000055187\tIMP\t\tP\tMitochondrial inner membrane m-AAA protease component\tYER017C|AAA family ATPase AFG3|YTA10\tgene\ttaxon:559292\t20170428\tSGD") assocs = p.parse(gaf, skipheader=True) # GO:4 is obsolete due to it being merged into GO:3 assert assocs[0]["object"]["id"] == "GO:3"
def test_skim_gaf_qualifiers(): p = GafParser() p.config.ecomap = EcoMap() p.config.remove_double_prefixes = True results = p.skim(open(QGAF, "r")) for r in results: print(str(r)) (s, sn, o) = r assert o.startswith('GO:') assert s.startswith('MGI:') or s.startswith('PomBase') assert len(results) == 5 # ensure NOTs are skipped p.config.exclude_relations = ['contributes_to', 'colocalizes_with'] results = p.skim(open(QGAF, "r")) for r in results: (s, sn, o) = r assert o.startswith('GO:') assert s.startswith('MGI:') or s.startswith('PomBase') assert len(results) == 3 # ensure NOTs and excludes relations skipped
def parse_with(f, p): p.config.ecomap = EcoMap() is_gaf = f == POMBASE ont = OntologyFactory().create(ONT) if is_gaf: # only do ontology checking on GAF parse; # this is because ontology is made from GAF p.config.ontology = ont else: p.config.ontology = None results = p.parse(open(f, "r"), skipheader=True) print(p.report.to_markdown()) r1 = results[0] # TODO: test datafile does not have ECOs yet!! assert r1['evidence']['type'] == 'ISO' or r1['evidence'][ 'type'] == 'ECO:0000201' assert r1['evidence']['with_support_from'] == ['SGD:S000001583'] assert r1['evidence']['has_supporting_reference'] == ['GO_REF:0000024'] if is_gaf: assert r1['subject']['label'] == 'ypf1' assert r1['date'] == '20150305' for r in results: #print(str(r)) sid = r['subject']['id'] prov = r['provided_by'] assert prov == 'PomBase' or prov == 'UniProt' assert r['object']['id'].startswith('GO:') assert sid.startswith('PomBase:') or (not is_gaf and sid.startswith('PR')) if is_gaf: assert r['subject']['taxon']['id'] == 'NCBITaxon:4896' # for m in p.report.messages: # print("MESSAGE: {}".format(m)) print("MESSAGES (sample): {}".format(p.report.messages[0:5])) assert len(p.report.messages) == 0 print(p.report.to_markdown())
def test_errors_gaf(): p = GafParser() p.config.ecomap = EcoMap() assocs = p.parse(open("tests/resources/errors.gaf", "r")) msgs = p.report.messages print("MESSAGES: {}".format(len(msgs))) for m in msgs: print("MESSAGE: {}".format(m)) assert len(msgs) == 15 # we expect 4 assert len(assocs) == 7 from ontobio.io import GafWriter w = GafWriter() w.write(assocs) xs = [x for a in assocs for x in a.get('object_extensions', [])] for x in xs: print('X: {}'.format(x)) # ensure that invalid expressions have been eliminated assert x['property'] == 'foo' assert x['filler'] == 'X:1' assert len(xs) == 3
def test_convert_gaf_to_gpad(): p = GafParser() p.config.ecomap = EcoMap() w = GpadWriter() p2 = GpadParser() convert(POMBASE, p, w, p2)
from ontobio.io import assocparser from ontobio.io.gpadparser import GpadParser from ontobio.io import gafparser from ontobio.io.gafparser import GafParser from ontobio.io import GafWriter from ontobio.io.assocwriter import GpadWriter from ontobio.assoc_factory import AssociationSetFactory from ontobio.ontol_factory import OntologyFactory from ontobio.model import association from ontobio.rdfgen import relations from ontobio.ecomap import EcoMap ecomap = EcoMap() ecomap.mappings() import tempfile import logging import pytest import io import json POMBASE = "tests/resources/truncated-pombase.gaf" POMBASE_GPAD = "tests/resources/truncated-pombase.gpad" ONT = "tests/resources/go-truncated-pombase.json" QGAF = "tests/resources/test-qualifiers.gaf" def test_skim_gaf(): p = GafParser() p.config.ecomap = EcoMap()
def __init__(self, filter_rule : FilterRule): self.filter_rule = filter_rule self.ecomap = EcoMap()
class RdfTransform(object): """ base class for all RDF generators """ def __init__(self, writer=None): if writer is None: writer = TurtleRdfWriter() self.writer = writer self.include_subject_info = False self.ecomap = EcoMap() self._emit_header_done = False self.uribase = 'http://example.org/' def genid(self): return URIRef(uuid.uuid4().urn) def blanknode(self): return BNode() def uri(self, id): # allow either atoms or objects if isinstance(id, dict): return self.uri(id['id']) logging.info("Expand: {}".format(id)) return URIRef(expand_uri(id)) def emit(self, s, p, o): logging.debug("TRIPLE: {} {} {}".format(s, p, o)) self.writer.add(s, p, o) return (s, p, o) def emit_type(self, s, t): return self.emit(s, RDF.type, t) def emit_label(self, s, t): return self.emit(s, RDFS.label, o) def eco_class(self, code, coderef=None): eco_cls_id = self.ecomap.coderef_to_ecoclass(code, coderef) logging.debug(self.ecomap._mappings) logging.debug('ECO: {},{}->{}'.format(code, coderef, eco_cls_id)) return self.uri(eco_cls_id) def translate_evidence(self, association, stmt): """ `` _:1 a Axiom subject s predicate p object o evidence [ a ECO ; ...] `` """ ev = association['evidence'] ev_id = None if 'id' in ev: ev_id = self.uri(ev['id']) else: ev_id = self.genid() stmt_id = self.blanknode() ## OWL reification: must be blank (s, p, o) = stmt self.emit_type(stmt_id, OWL.Axiom) self.emit(stmt_id, OWL.subject, s) self.emit(stmt_id, OWL.predicate, p) self.emit(stmt_id, OWL.object, o) self.emit(stmt_id, self.uri(evt.axiom_has_evidence), ev_id) ev_cls = self.eco_class(self.uri(ev['type'])) self.emit_type(ev_id, OWL.NamedIndividual) self.emit_type(ev_id, ev_cls) if 'with_support_from' in ev: for w in ev['with_support_from']: self.emit(ev_id, self.uri(evt.evidence_with_support_from), self.uri(w)) for ref in ev['has_supporting_reference']: self.emit(ev_id, HAS_SUPPORTING_REFERENCE, self.uri(ref)) if 'with_support_from' in ev: for ref in ev['with_support_from']: self.emit(ev_id, self.uri(evt.evidence_with_support_from), self.uri(ref))
class RdfTransform(object): """ base class for all RDF generators """ def __init__(self, writer=None): if writer is None: writer = TurtleRdfWriter() self.writer = writer self.include_subject_info = False self.ecomap = EcoMap() self._emit_header_done = False self.uribase = writer.base self.ecomap.mappings() self.bad_chars_regex = re.compile("[^\.:_\-0-9a-zA-Z]") self.ro_lookup = dict(relations.label_relation_lookup()) def blanknode(self): return BNode() def uri(self, id): # allow either atoms or objects if isinstance(id, dict): return self.uri(id['id']) # logger.info("Expand: {}".format(id)) id = self.bad_chars_regex.sub("_", id) uri = curie_util.expand_uri(id, cmaps=[prefix_context]) if uri != id: # If URI is different, then that means we found an curie expansion, and we should add the prefix prefix = id.split(":")[0] self.writer.graph.bind(prefix, prefix_context[prefix]) return URIRef(uri) def lookup_relation(self, label): label = label.replace('_', ' ') # Return the cached label -> URI or None if label in self.ro_lookup: return self.uri(self.ro_lookup[label]) else: return None def emit(self, s, p, o): logger.debug("TRIPLE: {} {} {}".format(s, p, o)) self.writer.add(s, p, o) return (s, p, o) def emit_type(self, s, t): return self.emit(s, RDF.type, t) def emit_label(self, s, o): return self.emit(s, RDFS.label, Literal(o)) def emit_not(self, s, t): bn = self.blanknode() self.emit_type(bn, OWL.Class) self.emit(bn, OWL.complementOf, URIRef(expand_uri(t))) return self.emit_type(s, bn) def eco_class(self, code, coderef=None): eco_cls_id = self.ecomap.coderef_to_ecoclass(code, coderef) logger.debug(self.ecomap._mappings) logger.debug('ECO: {},{}->{}'.format(code, coderef, eco_cls_id)) return self.uri(eco_cls_id) def translate_evidence(self, association, stmt): """ `` _:1 a Axiom owl:annotatedSource s owl:annotatedProperty p owl:annotatedTarget o evidence [ a ECO ; ...] `` """ ev = association['evidence'] ev_id = None if 'id' in ev: ev_id = self.uri(ev['id']) else: ev_id = genid(base=self.writer.base + '/') stmt_id = self.blanknode() ## OWL reification: must be blank (s, p, o) = stmt self.emit_type(stmt_id, OWL.Axiom) self.emit(stmt_id, OWL.annotatedSource, s) self.emit(stmt_id, OWL.annotatedProperty, p) self.emit(stmt_id, OWL.annotatedTarget, o) self.emit(stmt_id, self.uri(evt.axiom_has_evidence), ev_id) ev_cls = self.eco_class(self.uri(ev['type'])) self.emit_type(ev_id, OWL.NamedIndividual) self.emit_type(ev_id, ev_cls) if 'with_support_from' in ev: for w in ev['with_support_from']: self.emit(ev_id, self.uri(evt.evidence_with_support_from), self.uri(w)) for ref in ev['has_supporting_reference']: o = self.uri(ref) if ref == expand_uri(ref): o = Literal(ref) self.emit(ev_id, HAS_SUPPORTING_REFERENCE, o) if 'with_support_from' in ev: for ref in ev['with_support_from']: self.emit(ev_id, self.uri(evt.evidence_with_support_from), self.uri(ref))
class RdfTransform(object): """ base class for all RDF generators """ def __init__(self, writer=None): if writer is None: writer = TurtleRdfWriter() self.writer = writer self.include_subject_info = False self.ecomap = EcoMap() self._emit_header_done = False self.uribase = writer.base self.ro = None self.ecomap.mappings() def blanknode(self): return BNode() def uri(self, id): # allow either atoms or objects if isinstance(id, dict): return self.uri(id['id']) logging.info("Expand: {}".format(id)) uri = curie_util.expand_uri(id, cmaps=[prefix_context]) if uri != id: # If URI is different, then that means we found an curie expansion, and we should add the prefix prefix = id.split(":")[0] self.writer.graph.bind(prefix, prefix_context[prefix]) return URIRef(uri) def get_relation_ontology(self): if self.ro is None: ofa = OntologyFactory() self.ro = ofa.create('ro') return self.ro def lookup_relation(self, label): ro = self.get_relation_ontology() label = label.replace('_', ' ') results = [self.uri(x) for x in ro.search(label)] if len(results) > 0: return results[0] def emit(self, s, p, o): logging.debug("TRIPLE: {} {} {}".format(s, p, o)) self.writer.add(s, p, o) return (s, p, o) def emit_type(self, s, t): return self.emit(s, RDF.type, t) def emit_label(self, s, t): return self.emit(s, RDFS.label, o) def eco_class(self, code, coderef=None): eco_cls_id = self.ecomap.coderef_to_ecoclass(code, coderef) logging.debug(self.ecomap._mappings) logging.debug('ECO: {},{}->{}'.format(code, coderef, eco_cls_id)) return self.uri(eco_cls_id) def translate_evidence(self, association, stmt): """ `` _:1 a Axiom owl:annotatedSource s owl:annotatedProperty p owl:annotatedTarget o evidence [ a ECO ; ...] `` """ ev = association['evidence'] ev_id = None if 'id' in ev: ev_id = self.uri(ev['id']) else: ev_id = genid(base=self.writer.base + '/') stmt_id = self.blanknode() ## OWL reification: must be blank (s, p, o) = stmt self.emit_type(stmt_id, OWL.Axiom) self.emit(stmt_id, OWL.annotatedSource, s) self.emit(stmt_id, OWL.annotatedProperty, p) self.emit(stmt_id, OWL.annotatedTarget, o) self.emit(stmt_id, self.uri(evt.axiom_has_evidence), ev_id) ev_cls = self.eco_class(self.uri(ev['type'])) self.emit_type(ev_id, OWL.NamedIndividual) self.emit_type(ev_id, ev_cls) if 'with_support_from' in ev: for w in ev['with_support_from']: self.emit(ev_id, self.uri(evt.evidence_with_support_from), self.uri(w)) for ref in ev['has_supporting_reference']: o = self.uri(ref) if ref == expand_uri(ref): o = Literal(ref) self.emit(ev_id, HAS_SUPPORTING_REFERENCE, o) if 'with_support_from' in ev: for ref in ev['with_support_from']: self.emit(ev_id, self.uri(evt.evidence_with_support_from), self.uri(ref))