def resolve_keyword(keyword): query = make_template(""" prefix anno: <http://www.eha.io/types/annotation_prop/> prefix oboInOwl: <http://www.geneontology.org/formats/oboInOwl#> prefix obo: <http://purl.obolibrary.org/obo/> prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT ?entity WHERE { BIND (obo:DOID_4 AS ?disease) ?entity rdfs:subClassOf* ?disease . ?entity oboInOwl:hasNarrowSynonym|oboInOwl:hasRelatedSynonym|oboInOwl:hasExactSynonym|rdfs:label ?label FILTER regex(?label, "^({{keyword | escape}})$", "i") } """).render(keyword=re.escape(keyword)) resp = sparql_utils.query(query) bindings = resp.json()['results']['bindings'] if len(bindings) == 0: print("no match for", keyword.encode('ascii', 'xmlcharrefreplace')) elif len(bindings) > 1: print("multiple matches for", keyword.encode('ascii', 'xmlcharrefreplace')) print(bindings) return [binding['entity']['value'] for binding in bindings]
def print_result(result): for binding in result.json()['results']['bindings']: for key, value in binding.items(): raw_val = value['value'] print "[" + key + "]" # Check for the delimiter used to combine results in a "group by" query group. if ";;" in raw_val: print raw_val.split(";;") continue # If the value references an annotation, query it and display # the full text. if raw_val.startswith('http://www.eha.io/types/annotation/'): query = make_template(""" prefix anno: <http://www.eha.io/types/annotation_prop/> prefix dep: <http://www.eha.io/types/annotation_prop/dep/> prefix con: <http://www.eha.io/types/content/> SELECT ?phraseStart ?phraseEnd ?prepStart ?sourceText WHERE { <{{annotation_uri}}> anno:start ?phraseStart ; anno:end ?phraseEnd ; anno:source_doc/con:text ?sourceText } """).render(annotation_uri=raw_val) result = sparql_utils.query(query) bindings = result.json()['results']['bindings'] if len(bindings) == 0: print "Could not resolve source text for:" print key, raw_val for binding in bindings: text = binding['sourceText']['value'] start = int(binding['phraseStart']['value']) end = int(binding['phraseEnd']['value']) print text[start:end] else: print raw_val print "" print "~~--~~--~~"
def resolve_keyword(keyword): query = make_template(""" prefix anno: <http://www.eha.io/types/annotation_prop/> prefix oboInOwl: <http://www.geneontology.org/formats/oboInOwl#> prefix obo: <http://purl.obolibrary.org/obo/> prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT ?entity WHERE { BIND (obo:DOID_4 AS ?disease) ?entity rdfs:subClassOf* ?disease . ?entity oboInOwl:hasNarrowSynonym|oboInOwl:hasRelatedSynonym|oboInOwl:hasExactSynonym|rdfs:label ?label FILTER regex(?label, "^({{keyword | escape}})$", "i") } """).render( keyword=re.escape(keyword) ) resp = sparql_utils.query(query) bindings = resp.json()['results']['bindings'] if len(bindings) == 0: print("no match for", keyword.encode('ascii', 'xmlcharrefreplace')) elif len(bindings) > 1: print("multiple matches for", keyword.encode('ascii', 'xmlcharrefreplace')) print(bindings) return [binding['entity']['value'] for binding in bindings]
?descriptor anno:start ?d_start ; anno:end ?d_end ; anno:root/anno:pos ?pos . FILTER (?pos NOT IN ("X", "PUNCT")) ?target anno:category "diseases" ; anno:start ?t_start ; anno:end ?t_end ; ^dc:relation ?rel . ?rel rdf:label "malaria" . # The descriptor is outside of the target FILTER ( ?d_end <= ?t_start || ?t_end <= ?d_start ) } LIMIT 100 """ result = sparql_utils.query(query) print_result(result) print "Pathogens and the sentences they appear in" query = prefixes+""" SELECT ?phrase ?target WHERE { ?phrase anno:start ?p_start ; anno:end ?p_end ; dep:ROOT ?noop . ?target anno:category "pathogens" ; anno:start ?t_start ; anno:end ?t_end . ?phrase anno:source_doc ?same_source . ?target anno:source_doc ?same_source .
max_items = int(args.max_items) article_query_template = make_template(""" prefix con: <http://www.eha.io/types/content/> prefix anno: <http://www.eha.io/types/annotation_prop/> prefix eha: <http://www.eha.io/types/> SELECT ?item_uri ?content WHERE { ?item_uri con:text ?content FILTER NOT EXISTS { ?item_uri anno:annotated_by eha:spacy_0 } } ORDER BY asc(?item_uri) LIMIT 100 """) items_processed = 0 while max_items < 0 or items_processed < max_items: print("Items processed: ", str(items_processed)) result = sparql_utils.query(article_query_template.render()) bindings = result.json()['results']['bindings'] if len(bindings) == 0: print("No more results") break else: items_processed += len(bindings) for binding in bindings: item_uri = binding['item_uri']['value'] content = binding['content']['value'] print("Parsing ", item_uri) create_annotations(item_uri, content)
?descriptor anno:start ?d_start ; anno:end ?d_end ; anno:root/anno:pos ?pos . FILTER (?pos NOT IN ("X", "PUNCT")) ?target anno:category "diseases" ; anno:start ?t_start ; anno:end ?t_end ; ^dc:relation ?rel . ?rel rdf:label "malaria" . # The descriptor is outside of the target FILTER ( ?d_end <= ?t_start || ?t_end <= ?d_start ) } LIMIT 100 """ result = sparql_utils.query(query) print_result(result) print "Pathogens and the sentences they appear in" query = prefixes + """ SELECT ?phrase ?target WHERE { ?phrase anno:start ?p_start ; anno:end ?p_end ; dep:ROOT ?noop . ?target anno:category "pathogens" ; anno:start ?t_start ; anno:end ?t_end . ?phrase anno:source_doc ?same_source . ?target anno:source_doc ?same_source .
query_template = make_template(""" prefix con: <http://www.eha.io/types/content/> prefix anno: <http://www.eha.io/types/annotation_prop/> prefix eha: <http://www.eha.io/types/> SELECT ?item_uri ?content WHERE { ?item_uri con:text ?content # FILTER(strstarts(str(?item_uri), "http://t11.tater.io/documents/")) FILTER NOT EXISTS { ?item_uri anno:annotated_by eha:annie_1 } } ORDER BY rand() LIMIT 100 """) items_processed = 0 while max_items < 0 or items_processed < max_items: print("Items processed: ", str(items_processed)) result = sparql_utils.query(query_template.render()) bindings = result.json()['results']['bindings'] if len(bindings) == 0: print("No more results") break else: items_processed += len(bindings) for binding in bindings: item_uri = binding['item_uri']['value'] content = binding['content']['value'] print("Annotating ", item_uri) create_annotations(item_uri, content)
query = prefixes+""" SELECT ?p1 ?p2 WHERE { ?p1 anno:start ?p1start ; anno:end ?p1end ; anno:source_doc ?same_source . ?dep_rel rdf:type anno:dependency_relation . ?parent ?dep_rel ?p1 . ?p2 anno:start ?p2start ; anno:end ?p2end ; anno:source_doc ?same_source ; anno:category "diseases" . FILTER ( ?p1start <= ?p2start && ?p1end >= ?p2end ) FILTER (?p1 != ?p2) } """ resp = sparql_utils.query(query) print("Finished in", datetime.datetime.now() - start) start = datetime.datetime.now() print("Testing query speed with containment predicate...") query = prefixes+""" SELECT ?p1 ?p2 WHERE { ?p1 anno:contains ?p2 } """ resp = sparql_utils.query(query) print("Finished in", datetime.datetime.now() - start)
import requests from templater import make_template import sparql_utils result = sparql_utils.query(""" prefix pro: <http://www.eha.io/types/promed/> prefix eha: <http://www.eha.io/types/> prefix anno: <http://www.eha.io/types/annotation_prop/> SELECT ?annotator (count(?article) AS ?articles) WHERE { ?article pro:post ?post . OPTIONAL { ?article anno:annotated_by ?annotator } } GROUP BY ?annotator """) print result.content
query = prefixes + """ SELECT ?p1 ?p2 WHERE { ?p1 anno:start ?p1start ; anno:end ?p1end ; anno:source_doc ?same_source . ?dep_rel rdf:type anno:dependency_relation . ?parent ?dep_rel ?p1 . ?p2 anno:start ?p2start ; anno:end ?p2end ; anno:source_doc ?same_source ; anno:category "diseases" . FILTER ( ?p1start <= ?p2start && ?p1end >= ?p2end ) FILTER (?p1 != ?p2) } """ resp = sparql_utils.query(query) print("Finished in", datetime.datetime.now() - start) start = datetime.datetime.now() print("Testing query speed with containment predicate...") query = prefixes + """ SELECT ?p1 ?p2 WHERE { ?p1 anno:contains ?p2 } """ resp = sparql_utils.query(query) print("Finished in", datetime.datetime.now() - start)