コード例 #1
0
ファイル: test_gafparser.py プロジェクト: kshefchek/ontobio
def test_errors_gaf():
    config = assocparser.AssocParserConfig(
        ecomap=EcoMap()
    )
    p = GafParser(config=config)
    assocs = p.parse(open("tests/resources/errors.gaf", "r"), skipheader=True)
    msgs = p.report.messages
    print(json.dumps(p.report.to_report_json(), indent=4))
    # print("MESSAGES: {}".format(len(msgs)))
    n_invalid_idspace = 0
    for m in msgs:
        print("MESSAGE: {}".format(m))
        if m['type'] == assocparser.Report.INVALID_IDSPACE:
            n_invalid_idspace += 1
    assert len(msgs) == 13
    assert n_invalid_idspace == 1
    assert len(assocs) == 2

    w = GafWriter()
    w.write(assocs)
    for a in assocs:
        if a.object_extensions != []:
            # our test file has no ORs, so in DNF this is always the first
            xs = a.object_extensions[0].elements
            print(xs)
            for x in xs:

                print('X: {}'.format(x))
                # ensure that invalid expressions have been eliminated
                assert x.relation == association.Curie("BFO", "0000050")
                assert x.term == association.Curie.from_str('X:1')
            assert len(xs) == 1
コード例 #2
0
ファイル: test_gafparser.py プロジェクト: realmarcin/ontobio
def test_errors_gaf():
    p = GafParser()
    p.config.ecomap = EcoMap()
    assocs = p.parse(open("tests/resources/errors.gaf", "r"), skipheader=True)
    msgs = p.report.messages
    print("MESSAGES: {}".format(len(msgs)))
    n_invalid_idspace = 0
    for m in msgs:
        print("MESSAGE: {}".format(m))
        if m['type'] == assocparser.Report.INVALID_IDSPACE:
            n_invalid_idspace += 1
    assert len(msgs) == 17
    assert n_invalid_idspace == 1

    # we expect 6
    assert len(assocs) == 6

    w = GafWriter()
    w.write(assocs)
    for a in assocs:
        if 'object_extensions' in a:
            # our test file has no ORs, so in DNF this is always the first
            xs = a['object_extensions']['union_of'][0]['intersection_of']
            for x in xs:

                print('X: {}'.format(x))
                # ensure that invalid expressions have been eliminated
                assert x['property'] == 'foo'
                assert x['filler'] == 'X:1'
            assert len(xs) == 1
コード例 #3
0
def test_errors_gaf():
    p = GafParser()
    p.config.ecomap = EcoMap()
    assocs = p.parse(open("tests/resources/errors.gaf", "r"))
    msgs = p.report.messages
    print("MESSAGES: {}".format(len(msgs)))
    for m in msgs:
        print("MESSAGE: {}".format(m))
    assert len(msgs) == 15

    # we expect 4
    assert len(assocs) == 7
    from ontobio.io import GafWriter
    w = GafWriter()
    w.write(assocs)
    for a in assocs:
        if 'object_extensions' in a:
            # our test file has no ORs, so in DNF this is always the first
            xs = a['object_extensions']['union_of'][0]['intersection_of']
            for x in xs:

                print('X: {}'.format(x))
                # ensure that invalid expressions have been eliminated
                assert x['property'] == 'foo'
                assert x['filler'] == 'X:1'
            assert len(xs) == 1
コード例 #4
0
def test_errors_gaf():
    p = GafParser()
    p.config.ecomap = EcoMap()
    assocs = p.parse(open("tests/resources/errors.gaf", "r"))
    msgs = p.report.messages
    print("MESSAGES: {}".format(len(msgs)))
    for m in msgs:
        print("MESSAGE: {}".format(m))
    assert len(msgs) == 8
コード例 #5
0
ファイル: test_gafparser.py プロジェクト: cbp44/ontobio
def test_skim_gaf():
    p = GafParser()
    p.config.ecomap = EcoMap()
    results = p.skim(open(POMBASE, "r"))
    assert len(results) == 370
    for r in results:
        print(str(r))
        (s, sn, o) = r
        assert o.startswith('GO:')
        assert s.startswith('PomBase:')
コード例 #6
0
ファイル: test_gafparser.py プロジェクト: lpalbou/ontobio
def test_alt_id_repair():
    p = GafParser()
    ont = OntologyFactory().create(ALT_ID_ONT)
    p.config.ecomap = EcoMap()
    p.config.ontology = ont
    gaf = io.StringIO("SGD\tS000000819\tAFG3\t\tGO:0043623\tPMID:8681382|SGD_REF:S000055187\tIMP\t\tP\tMitochondrial inner membrane m-AAA protease component\tYER017C|AAA family ATPase AFG3|YTA10\tgene\ttaxon:559292\t20170428\tSGD")

    assocs = p.parse(gaf, skipheader=True)
    assert len(assocs) > 0
    assert assocs[0]["object"]["id"] == "GO:0043623"
コード例 #7
0
ファイル: test_gafparser.py プロジェクト: cbp44/ontobio
def test_skim_gpad():
    p = GpadParser()
    p.config.ecomap = EcoMap()
    results = p.skim(open(POMBASE_GPAD, "r"))
    assert len(results) == 1984
    for r in results:
        print(str(r))
        (s, sn, o) = r
        assert o.startswith('GO:')
        assert s.startswith('PomBase:') or s.startswith('PR:')
コード例 #8
0
def parse_with(f, p):
    p.config.ecomap = EcoMap()
    is_gaf = f == POMBASE
    ont = OntologyFactory().create(ONT)

    if is_gaf:
        # only do ontology checking on GAF parse;
        # this is because ontology is made from GAF
        p.config.ontology = ont
    else:
        p.config.ontology = None

    results = p.parse(open(f, "r"), skipheader=True)
    print(p.report.to_markdown())
    r1 = results[0]
    # TODO: test datafile does not have ECOs yet!!
    assert ecomap.ecoclass_to_coderef(str(
        r1.evidence.type))[0] == 'ISO' or str(
            r1.evidence.type) == 'ECO:0000201'
    assert r1.evidence.with_support_from == [
        association.ConjunctiveSet(
            [association.Curie.from_str('SGD:S000001583')])
    ]
    assert r1.evidence.has_supporting_reference == [
        association.Curie.from_str('GO_REF:0000024')
    ]

    if is_gaf:
        assert r1.subject.label == 'ypf1'
        assert association.ymd_str(r1.date, "") == '20150305'

    for r in results:
        #print(str(r))
        sid = r.subject.id
        prov = r.provided_by
        assert prov == 'PomBase' or prov == 'UniProt'
        assert r.object.id.namespace == "GO"
        assert sid.namespace == 'PomBase' or (not is_gaf
                                              and sid.namespace == 'PR')
        if is_gaf:
            assert str(r.subject.taxon) == 'NCBITaxon:4896'

    # for m in p.report.messages:
    #     print("MESSAGE: {}".format(m))
    print("MESSAGES (sample): {}".format(p.report.messages[0:5]))
    # Messages that aren't about upgrading qualifiers in rule 59 should be 0
    assert len([
        msg for msg in p.report.messages
        if msg["rule"] != 59 and msg["rule"] != 27
    ]) == 0
コード例 #9
0
def test_alt_id_repair():
    p = GafParser()
    ont = OntologyFactory().create(ALT_ID_ONT)
    p.config.ecomap = EcoMap()
    p.config.ontology = ont

    gaf = io.StringIO("SGD\tS000000819\tAFG3\t\tGO:1\tPMID:8681382|SGD_REF:S000055187\tIMP\t\tP\tMitochondrial inner membrane m-AAA protease component\tYER017C|AAA family ATPase AFG3|YTA10\tgene\ttaxon:559292\t20170428\tSGD")
    assocs = p.parse(gaf, skipheader=True)
    # GO:1 is obsolete, and has replaced by GO:0034622, so we should see that class ID.
    assert assocs[0]["object"]["id"] == "GO:2"

    gaf = io.StringIO("SGD\tS000000819\tAFG3\t\tGO:4\tPMID:8681382|SGD_REF:S000055187\tIMP\t\tP\tMitochondrial inner membrane m-AAA protease component\tYER017C|AAA family ATPase AFG3|YTA10\tgene\ttaxon:559292\t20170428\tSGD")
    assocs = p.parse(gaf, skipheader=True)
    # GO:4 is obsolete due to it being merged into GO:3
    assert assocs[0]["object"]["id"] == "GO:3"
コード例 #10
0
ファイル: test_gafparser.py プロジェクト: cbp44/ontobio
def test_skim_gaf_qualifiers():
    p = GafParser()
    p.config.ecomap = EcoMap()
    p.config.remove_double_prefixes = True
    results = p.skim(open(QGAF, "r"))
    for r in results:
        print(str(r))
        (s, sn, o) = r
        assert o.startswith('GO:')
        assert s.startswith('MGI:') or s.startswith('PomBase')
    assert len(results) == 5  # ensure NOTs are skipped

    p.config.exclude_relations = ['contributes_to', 'colocalizes_with']
    results = p.skim(open(QGAF, "r"))
    for r in results:
        (s, sn, o) = r
        assert o.startswith('GO:')
        assert s.startswith('MGI:') or s.startswith('PomBase')
    assert len(results) == 3  # ensure NOTs and excludes relations skipped
コード例 #11
0
def parse_with(f, p):
    p.config.ecomap = EcoMap()
    is_gaf = f == POMBASE
    ont = OntologyFactory().create(ONT)

    if is_gaf:
        # only do ontology checking on GAF parse;
        # this is because ontology is made from GAF
        p.config.ontology = ont
    else:
        p.config.ontology = None

    results = p.parse(open(f, "r"), skipheader=True)
    print(p.report.to_markdown())
    r1 = results[0]
    # TODO: test datafile does not have ECOs yet!!
    assert r1['evidence']['type'] == 'ISO' or r1['evidence'][
        'type'] == 'ECO:0000201'
    assert r1['evidence']['with_support_from'] == ['SGD:S000001583']
    assert r1['evidence']['has_supporting_reference'] == ['GO_REF:0000024']

    if is_gaf:
        assert r1['subject']['label'] == 'ypf1'
        assert r1['date'] == '20150305'

    for r in results:
        #print(str(r))
        sid = r['subject']['id']
        prov = r['provided_by']
        assert prov == 'PomBase' or prov == 'UniProt'
        assert r['object']['id'].startswith('GO:')
        assert sid.startswith('PomBase:') or (not is_gaf
                                              and sid.startswith('PR'))
        if is_gaf:
            assert r['subject']['taxon']['id'] == 'NCBITaxon:4896'

    # for m in p.report.messages:
    #     print("MESSAGE: {}".format(m))
    print("MESSAGES (sample): {}".format(p.report.messages[0:5]))
    assert len(p.report.messages) == 0
    print(p.report.to_markdown())
コード例 #12
0
def test_errors_gaf():
    p = GafParser()
    p.config.ecomap = EcoMap()
    assocs = p.parse(open("tests/resources/errors.gaf", "r"))
    msgs = p.report.messages
    print("MESSAGES: {}".format(len(msgs)))
    for m in msgs:
        print("MESSAGE: {}".format(m))
    assert len(msgs) == 15

    # we expect 4
    assert len(assocs) == 7
    from ontobio.io import GafWriter
    w = GafWriter()
    w.write(assocs)
    xs = [x for a in assocs for x in a.get('object_extensions', [])]
    for x in xs:
        print('X: {}'.format(x))
        # ensure that invalid expressions have been eliminated
        assert x['property'] == 'foo'
        assert x['filler'] == 'X:1'
    assert len(xs) == 3
コード例 #13
0
ファイル: test_gafparser.py プロジェクト: cbp44/ontobio
def test_convert_gaf_to_gpad():
    p = GafParser()
    p.config.ecomap = EcoMap()
    w = GpadWriter()
    p2 = GpadParser()
    convert(POMBASE, p, w, p2)
コード例 #14
0
ファイル: test_gafparser.py プロジェクト: cbp44/ontobio
from ontobio.io import assocparser
from ontobio.io.gpadparser import GpadParser
from ontobio.io import gafparser
from ontobio.io.gafparser import GafParser
from ontobio.io import GafWriter
from ontobio.io.assocwriter import GpadWriter
from ontobio.assoc_factory import AssociationSetFactory
from ontobio.ontol_factory import OntologyFactory
from ontobio.model import association
from ontobio.rdfgen import relations

from ontobio.ecomap import EcoMap

ecomap = EcoMap()
ecomap.mappings()

import tempfile
import logging
import pytest
import io
import json

POMBASE = "tests/resources/truncated-pombase.gaf"
POMBASE_GPAD = "tests/resources/truncated-pombase.gpad"
ONT = "tests/resources/go-truncated-pombase.json"
QGAF = "tests/resources/test-qualifiers.gaf"


def test_skim_gaf():
    p = GafParser()
    p.config.ecomap = EcoMap()
コード例 #15
0
ファイル: filter_rule.py プロジェクト: deepakunni3/ontobio
 def __init__(self, filter_rule : FilterRule):
     self.filter_rule = filter_rule
     self.ecomap = EcoMap()
コード例 #16
0
class RdfTransform(object):
    """
    base class for all RDF generators
    """
    def __init__(self, writer=None):
        if writer is None:
            writer = TurtleRdfWriter()
        self.writer = writer
        self.include_subject_info = False
        self.ecomap = EcoMap()
        self._emit_header_done = False
        self.uribase = 'http://example.org/'

    def genid(self):
        return URIRef(uuid.uuid4().urn)

    def blanknode(self):
        return BNode()

    def uri(self, id):
        # allow either atoms or objects
        if isinstance(id, dict):
            return self.uri(id['id'])
        logging.info("Expand: {}".format(id))
        return URIRef(expand_uri(id))

    def emit(self, s, p, o):
        logging.debug("TRIPLE: {} {} {}".format(s, p, o))
        self.writer.add(s, p, o)
        return (s, p, o)

    def emit_type(self, s, t):
        return self.emit(s, RDF.type, t)

    def emit_label(self, s, t):
        return self.emit(s, RDFS.label, o)

    def eco_class(self, code, coderef=None):
        eco_cls_id = self.ecomap.coderef_to_ecoclass(code, coderef)
        logging.debug(self.ecomap._mappings)
        logging.debug('ECO: {},{}->{}'.format(code, coderef, eco_cls_id))
        return self.uri(eco_cls_id)

    def translate_evidence(self, association, stmt):
        """

        ``
        _:1 a Axiom
            subject s
            predicate p
            object o
            evidence [ a ECO ; ...]
        ``
        
        """
        ev = association['evidence']
        ev_id = None
        if 'id' in ev:
            ev_id = self.uri(ev['id'])
        else:
            ev_id = self.genid()

        stmt_id = self.blanknode()  ## OWL reification: must be blank
        (s, p, o) = stmt
        self.emit_type(stmt_id, OWL.Axiom)

        self.emit(stmt_id, OWL.subject, s)
        self.emit(stmt_id, OWL.predicate, p)
        self.emit(stmt_id, OWL.object, o)

        self.emit(stmt_id, self.uri(evt.axiom_has_evidence), ev_id)

        ev_cls = self.eco_class(self.uri(ev['type']))
        self.emit_type(ev_id, OWL.NamedIndividual)
        self.emit_type(ev_id, ev_cls)
        if 'with_support_from' in ev:
            for w in ev['with_support_from']:
                self.emit(ev_id, self.uri(evt.evidence_with_support_from),
                          self.uri(w))
        for ref in ev['has_supporting_reference']:
            self.emit(ev_id, HAS_SUPPORTING_REFERENCE, self.uri(ref))
        if 'with_support_from' in ev:
            for ref in ev['with_support_from']:
                self.emit(ev_id, self.uri(evt.evidence_with_support_from),
                          self.uri(ref))
コード例 #17
0
class RdfTransform(object):
    """
    base class for all RDF generators
    """
    def __init__(self, writer=None):
        if writer is None:
            writer = TurtleRdfWriter()

        self.writer = writer
        self.include_subject_info = False
        self.ecomap = EcoMap()
        self._emit_header_done = False
        self.uribase = writer.base
        self.ecomap.mappings()
        self.bad_chars_regex = re.compile("[^\.:_\-0-9a-zA-Z]")
        self.ro_lookup = dict(relations.label_relation_lookup())

    def blanknode(self):
        return BNode()

    def uri(self, id):
        # allow either atoms or objects
        if isinstance(id, dict):
            return self.uri(id['id'])
        # logger.info("Expand: {}".format(id))

        id = self.bad_chars_regex.sub("_", id)
        uri = curie_util.expand_uri(id, cmaps=[prefix_context])
        if uri != id:
            # If URI is different, then that means we found an curie expansion, and we should add the prefix
            prefix = id.split(":")[0]
            self.writer.graph.bind(prefix, prefix_context[prefix])

        return URIRef(uri)

    def lookup_relation(self, label):
        label = label.replace('_', ' ')

        # Return the cached label -> URI or None
        if label in self.ro_lookup:
            return self.uri(self.ro_lookup[label])
        else:
            return None

    def emit(self, s, p, o):
        logger.debug("TRIPLE: {} {} {}".format(s, p, o))
        self.writer.add(s, p, o)
        return (s, p, o)

    def emit_type(self, s, t):
        return self.emit(s, RDF.type, t)

    def emit_label(self, s, o):
        return self.emit(s, RDFS.label, Literal(o))

    def emit_not(self, s, t):
        bn = self.blanknode()
        self.emit_type(bn, OWL.Class)
        self.emit(bn, OWL.complementOf, URIRef(expand_uri(t)))
        return self.emit_type(s, bn)

    def eco_class(self, code, coderef=None):
        eco_cls_id = self.ecomap.coderef_to_ecoclass(code, coderef)
        logger.debug(self.ecomap._mappings)
        logger.debug('ECO: {},{}->{}'.format(code, coderef, eco_cls_id))
        return self.uri(eco_cls_id)

    def translate_evidence(self, association, stmt):
        """

        ``
        _:1 a Axiom
            owl:annotatedSource s
            owl:annotatedProperty p
            owl:annotatedTarget o
            evidence [ a ECO ; ...]
        ``

        """
        ev = association['evidence']
        ev_id = None
        if 'id' in ev:
            ev_id = self.uri(ev['id'])
        else:
            ev_id = genid(base=self.writer.base + '/')

        stmt_id = self.blanknode()  ## OWL reification: must be blank
        (s, p, o) = stmt
        self.emit_type(stmt_id, OWL.Axiom)

        self.emit(stmt_id, OWL.annotatedSource, s)
        self.emit(stmt_id, OWL.annotatedProperty, p)
        self.emit(stmt_id, OWL.annotatedTarget, o)

        self.emit(stmt_id, self.uri(evt.axiom_has_evidence), ev_id)

        ev_cls = self.eco_class(self.uri(ev['type']))
        self.emit_type(ev_id, OWL.NamedIndividual)
        self.emit_type(ev_id, ev_cls)
        if 'with_support_from' in ev:
            for w in ev['with_support_from']:
                self.emit(ev_id, self.uri(evt.evidence_with_support_from),
                          self.uri(w))
        for ref in ev['has_supporting_reference']:
            o = self.uri(ref)
            if ref == expand_uri(ref):
                o = Literal(ref)
            self.emit(ev_id, HAS_SUPPORTING_REFERENCE, o)
        if 'with_support_from' in ev:
            for ref in ev['with_support_from']:
                self.emit(ev_id, self.uri(evt.evidence_with_support_from),
                          self.uri(ref))
コード例 #18
0
class RdfTransform(object):
    """
    base class for all RDF generators
    """
    def __init__(self, writer=None):
        if writer is None:
            writer = TurtleRdfWriter()
        self.writer = writer
        self.include_subject_info = False
        self.ecomap = EcoMap()
        self._emit_header_done = False
        self.uribase = writer.base
        self.ro = None
        self.ecomap.mappings()

    def blanknode(self):
        return BNode()

    def uri(self, id):
        # allow either atoms or objects
        if isinstance(id, dict):
            return self.uri(id['id'])
        logging.info("Expand: {}".format(id))

        uri = curie_util.expand_uri(id, cmaps=[prefix_context])
        if uri != id:
            # If URI is different, then that means we found an curie expansion, and we should add the prefix
            prefix = id.split(":")[0]
            self.writer.graph.bind(prefix, prefix_context[prefix])

        return URIRef(uri)

    def get_relation_ontology(self):
        if self.ro is None:
            ofa = OntologyFactory()
            self.ro = ofa.create('ro')
        return self.ro

    def lookup_relation(self, label):
        ro = self.get_relation_ontology()
        label = label.replace('_', ' ')
        results = [self.uri(x) for x in ro.search(label)]
        if len(results) > 0:
            return results[0]

    def emit(self, s, p, o):
        logging.debug("TRIPLE: {} {} {}".format(s, p, o))
        self.writer.add(s, p, o)
        return (s, p, o)

    def emit_type(self, s, t):
        return self.emit(s, RDF.type, t)

    def emit_label(self, s, t):
        return self.emit(s, RDFS.label, o)

    def eco_class(self, code, coderef=None):
        eco_cls_id = self.ecomap.coderef_to_ecoclass(code, coderef)
        logging.debug(self.ecomap._mappings)
        logging.debug('ECO: {},{}->{}'.format(code, coderef, eco_cls_id))
        return self.uri(eco_cls_id)

    def translate_evidence(self, association, stmt):
        """

        ``
        _:1 a Axiom
            owl:annotatedSource s
            owl:annotatedProperty p
            owl:annotatedTarget o
            evidence [ a ECO ; ...]
        ``

        """
        ev = association['evidence']
        ev_id = None
        if 'id' in ev:
            ev_id = self.uri(ev['id'])
        else:
            ev_id = genid(base=self.writer.base + '/')

        stmt_id = self.blanknode()  ## OWL reification: must be blank
        (s, p, o) = stmt
        self.emit_type(stmt_id, OWL.Axiom)

        self.emit(stmt_id, OWL.annotatedSource, s)
        self.emit(stmt_id, OWL.annotatedProperty, p)
        self.emit(stmt_id, OWL.annotatedTarget, o)

        self.emit(stmt_id, self.uri(evt.axiom_has_evidence), ev_id)

        ev_cls = self.eco_class(self.uri(ev['type']))
        self.emit_type(ev_id, OWL.NamedIndividual)
        self.emit_type(ev_id, ev_cls)
        if 'with_support_from' in ev:
            for w in ev['with_support_from']:
                self.emit(ev_id, self.uri(evt.evidence_with_support_from),
                          self.uri(w))
        for ref in ev['has_supporting_reference']:
            o = self.uri(ref)
            if ref == expand_uri(ref):
                o = Literal(ref)
            self.emit(ev_id, HAS_SUPPORTING_REFERENCE, o)
        if 'with_support_from' in ev:
            for ref in ev['with_support_from']:
                self.emit(ev_id, self.uri(evt.evidence_with_support_from),
                          self.uri(ref))