Exemple #1
0
def resolve_keyword(keyword):
    query = make_template("""
    prefix anno: <http://www.eha.io/types/annotation_prop/>
    prefix oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
    prefix obo: <http://purl.obolibrary.org/obo/>
    prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    SELECT ?entity
    WHERE {
        BIND (obo:DOID_4 AS ?disease)
        ?entity rdfs:subClassOf* ?disease .
        ?entity oboInOwl:hasNarrowSynonym|oboInOwl:hasRelatedSynonym|oboInOwl:hasExactSynonym|rdfs:label ?label
        FILTER regex(?label, "^({{keyword | escape}})$", "i")
    }
    """).render(keyword=re.escape(keyword))
    resp = sparql_utils.query(query)
    bindings = resp.json()['results']['bindings']
    if len(bindings) == 0:
        print("no match for", keyword.encode('ascii', 'xmlcharrefreplace'))
    elif len(bindings) > 1:
        print("multiple matches for",
              keyword.encode('ascii', 'xmlcharrefreplace'))
        print(bindings)
    return [binding['entity']['value'] for binding in bindings]
Exemple #2
0
def print_result(result):
    for binding in result.json()['results']['bindings']:
        for key, value in binding.items():
            raw_val = value['value']
            print "[" + key + "]"
            # Check for the delimiter used to combine results in a "group by" query group.
            if ";;" in raw_val:
                print raw_val.split(";;")
                continue
            # If the value references an annotation, query it and display
            # the full text.
            if raw_val.startswith('http://www.eha.io/types/annotation/'):
                query = make_template("""
                prefix anno: <http://www.eha.io/types/annotation_prop/>
                prefix dep: <http://www.eha.io/types/annotation_prop/dep/>
                prefix con: <http://www.eha.io/types/content/>
                SELECT ?phraseStart ?phraseEnd ?prepStart ?sourceText
                WHERE {
                    <{{annotation_uri}}> anno:start ?phraseStart
                        ; anno:end ?phraseEnd
                        ; anno:source_doc/con:text ?sourceText
                }
                """).render(annotation_uri=raw_val)
                result = sparql_utils.query(query)
                bindings = result.json()['results']['bindings']
                if len(bindings) == 0:
                    print "Could not resolve source text for:"
                    print key, raw_val
                for binding in bindings:
                    text = binding['sourceText']['value']
                    start = int(binding['phraseStart']['value'])
                    end = int(binding['phraseEnd']['value'])
                    print text[start:end]
            else:
                print raw_val
            print ""
        print "~~--~~--~~"
Exemple #3
0
def print_result(result):
    for binding in result.json()['results']['bindings']:
        for key, value in binding.items():
            raw_val = value['value']
            print "[" + key + "]"
            # Check for the delimiter used to combine results in a "group by" query group.
            if ";;" in raw_val:
                print raw_val.split(";;")
                continue
            # If the value references an annotation, query it and display
            # the full text.
            if raw_val.startswith('http://www.eha.io/types/annotation/'):
                query = make_template("""
                prefix anno: <http://www.eha.io/types/annotation_prop/>
                prefix dep: <http://www.eha.io/types/annotation_prop/dep/>
                prefix con: <http://www.eha.io/types/content/>
                SELECT ?phraseStart ?phraseEnd ?prepStart ?sourceText
                WHERE {
                    <{{annotation_uri}}> anno:start ?phraseStart
                        ; anno:end ?phraseEnd
                        ; anno:source_doc/con:text ?sourceText
                }
                """).render(annotation_uri=raw_val)
                result = sparql_utils.query(query)
                bindings = result.json()['results']['bindings']
                if len(bindings) == 0:
                    print "Could not resolve source text for:"
                    print key, raw_val
                for binding in bindings:
                    text = binding['sourceText']['value']
                    start = int(binding['phraseStart']['value'])
                    end = int(binding['phraseEnd']['value'])
                    print text[start:end]
            else:
                print raw_val
            print ""
        print "~~--~~--~~"
def resolve_keyword(keyword):
    query = make_template("""
    prefix anno: <http://www.eha.io/types/annotation_prop/>
    prefix oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
    prefix obo: <http://purl.obolibrary.org/obo/>
    prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    SELECT ?entity
    WHERE {
        BIND (obo:DOID_4 AS ?disease)
        ?entity rdfs:subClassOf* ?disease .
        ?entity oboInOwl:hasNarrowSynonym|oboInOwl:hasRelatedSynonym|oboInOwl:hasExactSynonym|rdfs:label ?label
        FILTER regex(?label, "^({{keyword | escape}})$", "i")
    }
    """).render(
        keyword=re.escape(keyword)
    )
    resp = sparql_utils.query(query)
    bindings = resp.json()['results']['bindings']
    if len(bindings) == 0:
        print("no match for", keyword.encode('ascii', 'xmlcharrefreplace'))
    elif len(bindings) > 1:
        print("multiple matches for", keyword.encode('ascii', 'xmlcharrefreplace'))
        print(bindings)
    return [binding['entity']['value'] for binding in bindings]
Exemple #5
0
     ?descriptor anno:start ?d_start
         ; anno:end ?d_end
         ; anno:root/anno:pos ?pos
         .
     FILTER (?pos NOT IN ("X", "PUNCT"))
     ?target anno:category "diseases"
         ; anno:start ?t_start
         ; anno:end ?t_end
         ; ^dc:relation ?rel
         .
     ?rel rdf:label "malaria" .
     # The descriptor is outside of the target
     FILTER ( ?d_end <= ?t_start || ?t_end <= ?d_start )
 } LIMIT 100
 """
 result = sparql_utils.query(query)
 print_result(result)
 print "Pathogens and the sentences they appear in"
 query = prefixes+"""
 SELECT ?phrase ?target
 WHERE {
     ?phrase anno:start ?p_start
         ; anno:end ?p_end
         ; dep:ROOT ?noop
         .
     ?target anno:category "pathogens"
         ; anno:start ?t_start
         ; anno:end ?t_end
         .
     ?phrase anno:source_doc ?same_source .
     ?target anno:source_doc ?same_source .
    max_items = int(args.max_items)
    article_query_template = make_template("""
    prefix con: <http://www.eha.io/types/content/>
    prefix anno: <http://www.eha.io/types/annotation_prop/>
    prefix eha: <http://www.eha.io/types/>
    SELECT ?item_uri ?content
    WHERE {
        ?item_uri con:text ?content
        FILTER NOT EXISTS {
            ?item_uri anno:annotated_by eha:spacy_0
        }
    }
    ORDER BY asc(?item_uri)
    LIMIT 100
    """)
    items_processed = 0
    while max_items < 0 or items_processed < max_items:
        print("Items processed: ", str(items_processed))
        result = sparql_utils.query(article_query_template.render())
        bindings = result.json()['results']['bindings']
        if len(bindings) == 0:
            print("No more results")
            break
        else:
            items_processed += len(bindings)
            for binding in bindings:
                item_uri = binding['item_uri']['value']
                content = binding['content']['value']
                print("Parsing ", item_uri)
                create_annotations(item_uri, content)
Exemple #7
0
     ?descriptor anno:start ?d_start
         ; anno:end ?d_end
         ; anno:root/anno:pos ?pos
         .
     FILTER (?pos NOT IN ("X", "PUNCT"))
     ?target anno:category "diseases"
         ; anno:start ?t_start
         ; anno:end ?t_end
         ; ^dc:relation ?rel
         .
     ?rel rdf:label "malaria" .
     # The descriptor is outside of the target
     FILTER ( ?d_end <= ?t_start || ?t_end <= ?d_start )
 } LIMIT 100
 """
 result = sparql_utils.query(query)
 print_result(result)
 print "Pathogens and the sentences they appear in"
 query = prefixes + """
 SELECT ?phrase ?target
 WHERE {
     ?phrase anno:start ?p_start
         ; anno:end ?p_end
         ; dep:ROOT ?noop
         .
     ?target anno:category "pathogens"
         ; anno:start ?t_start
         ; anno:end ?t_end
         .
     ?phrase anno:source_doc ?same_source .
     ?target anno:source_doc ?same_source .
    query_template = make_template("""
    prefix con: <http://www.eha.io/types/content/>
    prefix anno: <http://www.eha.io/types/annotation_prop/>
    prefix eha: <http://www.eha.io/types/>
    SELECT ?item_uri ?content
    WHERE {
        ?item_uri con:text ?content
        # FILTER(strstarts(str(?item_uri), "http://t11.tater.io/documents/"))
        FILTER NOT EXISTS {
            ?item_uri anno:annotated_by eha:annie_1
        }
    }
    ORDER BY rand()
    LIMIT 100
    """)
    items_processed = 0
    while max_items < 0 or items_processed < max_items:
        print("Items processed: ", str(items_processed))
        result = sparql_utils.query(query_template.render())
        bindings = result.json()['results']['bindings']
        if len(bindings) == 0:
            print("No more results")
            break
        else:
            items_processed += len(bindings)
            for binding in bindings:
                item_uri = binding['item_uri']['value']
                content = binding['content']['value']
                print("Annotating ", item_uri)
                create_annotations(item_uri, content)
        query = prefixes+"""
        SELECT ?p1 ?p2
        WHERE {
            ?p1 anno:start ?p1start
                ; anno:end ?p1end
                ; anno:source_doc ?same_source
                .
            ?dep_rel rdf:type anno:dependency_relation .
            ?parent ?dep_rel ?p1 .
            ?p2 anno:start ?p2start
                ; anno:end ?p2end
                ; anno:source_doc ?same_source
                ; anno:category "diseases"
                .
            FILTER ( ?p1start <= ?p2start && ?p1end >= ?p2end )
            FILTER (?p1 != ?p2)
        }
        """
        resp = sparql_utils.query(query)
        print("Finished in", datetime.datetime.now() - start)
        start = datetime.datetime.now()
        print("Testing query speed with containment predicate...")
        query = prefixes+"""
        SELECT ?p1 ?p2
        WHERE {
            ?p1 anno:contains ?p2
        }
        """
        resp = sparql_utils.query(query)
        print("Finished in", datetime.datetime.now() - start)
Exemple #10
0
import requests
from templater import make_template
import sparql_utils

result = sparql_utils.query("""
prefix pro: <http://www.eha.io/types/promed/>
prefix eha: <http://www.eha.io/types/>
prefix anno: <http://www.eha.io/types/annotation_prop/>
SELECT
    ?annotator
    (count(?article) AS ?articles)
WHERE {
    ?article pro:post ?post .
    OPTIONAL {
        ?article anno:annotated_by ?annotator
    }
}
GROUP BY ?annotator
""")
print result.content
Exemple #11
0
    query_template = make_template("""
    prefix con: <http://www.eha.io/types/content/>
    prefix anno: <http://www.eha.io/types/annotation_prop/>
    prefix eha: <http://www.eha.io/types/>
    SELECT ?item_uri ?content
    WHERE {
        ?item_uri con:text ?content
        # FILTER(strstarts(str(?item_uri), "http://t11.tater.io/documents/"))
        FILTER NOT EXISTS {
            ?item_uri anno:annotated_by eha:annie_1
        }
    }
    ORDER BY rand()
    LIMIT 100
    """)
    items_processed = 0
    while max_items < 0 or items_processed < max_items:
        print("Items processed: ", str(items_processed))
        result = sparql_utils.query(query_template.render())
        bindings = result.json()['results']['bindings']
        if len(bindings) == 0:
            print("No more results")
            break
        else:
            items_processed += len(bindings)
            for binding in bindings:
                item_uri = binding['item_uri']['value']
                content = binding['content']['value']
                print("Annotating ", item_uri)
                create_annotations(item_uri, content)
Exemple #12
0
        query = prefixes + """
        SELECT ?p1 ?p2
        WHERE {
            ?p1 anno:start ?p1start
                ; anno:end ?p1end
                ; anno:source_doc ?same_source
                .
            ?dep_rel rdf:type anno:dependency_relation .
            ?parent ?dep_rel ?p1 .
            ?p2 anno:start ?p2start
                ; anno:end ?p2end
                ; anno:source_doc ?same_source
                ; anno:category "diseases"
                .
            FILTER ( ?p1start <= ?p2start && ?p1end >= ?p2end )
            FILTER (?p1 != ?p2)
        }
        """
        resp = sparql_utils.query(query)
        print("Finished in", datetime.datetime.now() - start)
        start = datetime.datetime.now()
        print("Testing query speed with containment predicate...")
        query = prefixes + """
        SELECT ?p1 ?p2
        WHERE {
            ?p1 anno:contains ?p2
        }
        """
        resp = sparql_utils.query(query)
        print("Finished in", datetime.datetime.now() - start)