def run_query_new(query, endpoint): """Query a SPARQL endpoint and return reportable results in text format. Modified for specific queries. Keyword arguments: query -- a SPARQL query file on disk endpoint -- an open SPARQL endpoint """ result = "" data = vivotools.vivo_sparql_query(query, baseURL=endpoint) #print data #if (not(any(data["results"]["bindings"]))) if len(data["results"]["bindings"]) == 0: result = "0 \t\t" + (sparql_variable_rename(data["head"]["vars"][0])) + "\n" return result else: result += str(len(data["results"]["bindings"])/2) +" \ "+ (sparql_variable_rename(data["head"]["vars"][0])) + "\n" for var in data["head"]["vars"]: result += ("\t"+sparql_variable_rename(var)) + "\t\t\t" result += "\n" for item in data["results"]["bindings"]: for var in data["head"]["vars"]: result += "\t" + item[var]["value"] + "\t" result += "\n" #print result return result
def run_query(query, endpoint, query_file): #print query # assume initially there are no errors error = False logging.info("Sending query " + query_file) data = vivotools.vivo_sparql_query(query, baseURL = endpoint) #print data for var in data["head"]["vars"]: #print var if (len(data["results"]["bindings"]) == 0): num_error_records = 0 report = str(num_error_records) + "\t\t" + sparql_variable_rename(var) + "\n" else: report = "" error = True num_error_records = len(data["results"]["bindings"]) report = str(num_error_records) + "\t\t" + sparql_variable_rename(var) + "\n" for error_record in data["results"]["bindings"]: #print error_record error_value = error_record[var.encode('ascii', 'ignore')]["value"] #print error_value # insert variable name and error value report += "\t\t\t" + error_value + "\n" #print report """# insert code here to re-run reporting function if error: query_error = str(query + "_error") print run_error_query(query) """ return report
def make_phone_dictionary(phone_dictionary={}, debug=False): """ Extract all phone numbers in VIVO and organize them into a dictionary keyed by uri and each with a dictionary of phone and primary phone """ query = tempita.Template(""" SELECT ?uri ?phone ?primary WHERE { {?uri vivo:phoneNumber ?phone .} UNION {?uri vivo:primaryPhoneNumber ?primary .} }""") query = query.substitute() result = vt.vivo_sparql_query(query) try: count = len(result["results"]["bindings"]) except: count = 0 if debug: print query, count,\ result["results"]["bindings"][0], result["results"]["bindings"][1] # i = 0 while i < count: b = result["results"]["bindings"][i] uri = b['uri']['value'] dict = {} if 'phone' in b: dict['phone'] = b['phone']['value'] if 'primary' in b: dict['primary'] = b['primary']['value'] if dict != {}: phone_dictionary[uri] = dict i = i + 1 return phone_dictionary
def run_query(query, endpoint, query_file): """Query a SPARQL endpoint and return reportable results in text format. Keyword arguments: query -- a SPARQL query file on disk endpoint -- an open SPARQL endpoint query_file -- the SPARQL query file name """ #print query logging.info("Sending query " + query_file) data = vivotools.vivo_sparql_query(query, baseURL=endpoint) #print data for var in data["head"]["vars"]: #print var if len(data["results"]["bindings"]) == 0: num_error_records = 0 report = str(num_error_records) + "\t\t" + sparql_variable_rename(var) + "\n" else: report = "" num_error_records = len(data["results"]["bindings"]) report = str(num_error_records) + "\t\t" + sparql_variable_rename(var) + "\n" for error_record in data["results"]["bindings"]: #print error_record error_value = error_record[var.encode('ascii', 'ignore')]["value"] #print error_value # insert variable name and error value report += "\t\t\t" + error_value + "\n" #print report return report
def t32_dept_counts(uri): """ Given a URI of a unit, count five things the NIH wants to know about the unit for a T32 application """ query = tempita.Template(""" # # Count the faculty, graduate students and postdocs in a unit # SELECT (COUNT(DISTINCT ?fac) AS ?faculty_count) (COUNT(DISTINCT ?pre) AS ?predoc_count) (COUNT(DISTINCT ?pos) AS ?postdoc_count) (COUNT(DISTINCT ?pre_sup) AS ?predoc_supported) (COUNT(DISTINCT ?pos_sup) AS ?postdoc_supported) WHERE { { ?fac ufVivo:homeDept <{{uri}}> . ?fac a vivo:FacultyMember . } UNION { ?pre ufVivo:homeDept <{{uri}}> . ?pre a vivo:GraduateStudent . } UNION { ?pos ufVivo:homeDept <{{uri}}> . ?pos a vivo:Postdoc . } UNION { ?pre_sup ufVivo:homeDept <{{uri}}> . ?pre_sup a vivo:GraduateStudent . ?pre_sup vivo:hasPrincipalInvestigatorRole ?role . ?role vivo:roleIn ?grant . ?grant a vivo:Grant . } UNION { ?pos_sup ufVivo:homeDept <{{uri}}> . ?pos_sup a vivo:Postdoc . ?pos_sup vivo:hasPrincipalInvestigatorRole ?role . ?role vivo:roleIn ?grant . ?grant a vivo:Grant . } } GROUP BY ?uri """) query = query.substitute(uri=uri) result = vt.vivo_sparql_query(query) t32 = {} for vname in ['faculty_count', 'predoc_count', 'postdoc_count', 'predoc_supported','postdoc_supported']: try: t32[vname] = \ result['results']['bindings'][0][vname]['value'] except: pass return t32
def run_error_query(query_error): # portable for *NIX/Windows if os.name == "nt": path = os.getcwd() + "\\error_queries\\" elif os.name == "posix": path = os.getcwd() + "/error_queries/" logging.info("Reading error query from " + path + query_error) data = vivotools.vivo_sparql_query(query_error, baseURL=endpoint) print data for var in data["head"]["vars"]: report = sparql_variable_rename(var) + ":" +"\t\t\t" + data["results"]["bindings"][0][var.encode('ascii', 'ignore')]["value"] + "\n" return report
def case0(last,first,middle,debug): query = tempita.Template(""" #search on last name SELECT ?x ?lname WHERE { ?x rdf:type core:FacultyMember . ?x foaf:lastName ?lname . FILTER (regex(?lname, '''{{last}}''', "i")) }""") query = query.substitute(last=last) result = vivotools.vivo_sparql_query(query) if debug: print query,result return result
def case01(last,first,middle,debug): query = tempita.Template(""" #search on last name SELECT ?x ?lname ?ufid WHERE { ?x rdf:type foaf:Person . ?x foaf:lastName ?lname . ?x ufVivo:ufid ?ufid . FILTER (regex(?lname, '''{{last}}''', "i")) }""") query = query.substitute(last=last) result = vivotools.vivo_sparql_query(query) if debug: print query,result return result
def find_person(ufid): """ Given a UFID, find the URI of the person that has that ufidid """ query = tempita.Template(""" SELECT ?uri WHERE { ?uri ufVivo:ufid "{{ufid}}" . } """) query = query.substitute(ufid=ufid) result = vt.vivo_sparql_query(query) try: uri = result['results']['bindings'][0]['uri']['value'] except: uri = None return uri
def find_dept(deptid): """ Given a UF deptid, find the URI of the org that has that deptid """ query = tempita.Template(""" SELECT ?uri WHERE { ?uri ufVivo:deptID "{{deptid}}" . } """) query = query.substitute(deptid=deptid) result = vt.vivo_sparql_query(query) try: uri = result['results']['bindings'][0]['uri']['value'] except: uri = None return uri
def case3(last,first,middle,debug): query = tempita.Template(""" #search on last, first initial, middle initial SELECT ?x ?fname ?lname ?mname WHERE { ?x rdf:type core:FacultyMember . ?x foaf:firstName ?fname . ?x foaf:lastName ?lname . ?x core:middleName ?mname . FILTER (regex(?fname, '''^{{first}}''', "i")) FILTER (regex(?mname, '''^{{middle}}''', "i")) FILTER (regex(?lname, '''{{last}}''', "i")) }""") query = query.substitute(last=last,first=first,middle=middle) result = vivotools.vivo_sparql_query(query) if debug: print query,result return result
def make_items(debug=False): """ Extract all the papers for the feed and organize them as feed items in a list """ query = tempita.Template("""SELECT DISTINCT ?x ?dt ?label WHERE { ?x rdf:type bibo:Document . ?x core:dateTimeValue ?dtv . ?x rdfs:label ?label . ?x ufVivo:dateHarvested ?dh . ?dtv core:dateTime ?dt . FILTER regex(?dh,"{{expression}}") } ORDER BY DESC(?dt)""") query = query.substitute(expression=make_date_expression()) #print query result = vivotools.vivo_sparql_query(query) #print result try: count = len(result["results"]["bindings"]) except: count = 0 if debug: print query, count, result["results"]["bindings"][0],\ result["results"]["bindings"][1] i = 0 date_cutoff = TODAY - datetime.timedelta(days=DAYS_SINCE_PUBLICATION) if debug: print "Cutoff date for publications is ", date_cutoff items = [] while i < count: b = result["results"]["bindings"][i] title = b['label']['value'] uri = b['x']['value'] dt = b['dt']['value'] date_published = datetime.date(int(dt[0:4]), int(dt[5:7]), int(dt[8:10])) if date_published >= date_cutoff: items.append(PyRSS2Gen.RSSItem(title=title, link=uri, pubDate=datetime.datetime.now())) i = i + 1 return items
def run_query_new(query,endpoint): error = False result="" data = vivotools.vivo_sparql_query(query, baseURL = endpoint) #print data if (len(data["results"]["bindings"]) == 0): result = "0 \t\t" + (sparql_variable_rename(data["head"]["vars"][0])) + "\n" return result else: result += str(len(data["results"]["bindings"])) +" "+ (sparql_variable_rename(data["head"]["vars"][0])) + "\n" print data["head"]["vars"] for var in data["head"]["vars"]: result += ("\t\t"+sparql_variable_rename(var)) + "\t\t\t\t\t" result += "\n" for item in data["results"]["bindings"]: for var in data["head"]["vars"]: result += item[var]["value"] + "\t\t\t\t\t\t" result += "\n" #print result return result
def make_data_dictionary(pred="vivo:totalAwardAmount", debug=False): """ Create a dictionary of the specified data pred. Key is uri. """ query = """ SELECT ?uri ?data WHERE { ?uri {{pred}} ?data . } """ query = query.replace('{{pred}}', pred) result = vivo_sparql_query(query) if debug: print "Query = ", query print "Result = ", len(result['results']['bindings']) dictionary = {} for row in result['results']['bindings']: uri = row['uri']['value'] data = row['data']['value'] dictionary[uri] = data return dictionary
def run_query(query, endpoint, query_file): # assume initially there are no errors report = "" error_report = "" error = False data = vivotools.vivo_sparql_query(query, baseURL=endpoint) for var in data["head"]["vars"]: error_value = \ data["results"]["bindings"][0][var.encode('ascii', 'ignore')]["value"] # insert variable name and error value report = error_value + "\t\t" + sparql_variable_rename(var) + "\n" if int((data["results"]["bindings"][0][var.encode('ascii', 'ignore')]["value"])) != 0: error = True if error: # if we've encountered a non-zero value, run the corresponding error query # make sure the file exists query_error = str(query_file[:-3] + "_error" + ".rq") try: error_report = run_error_query(query_error) except IOError: logging.critical("Critical error: " + query_error + ".rq does not exist.") return {"report":report, "error_report":error_report}
# VIVO URI tester # usage: python uritester.py (integer) > outputfile # specify the number of URIs desired in (integer), and write to outputfile import vivotools print vivotools.get_vivo_uri() print vivotools.get_vivo_uri() query=""" #search on last, first initial, middle initial SELECT ?x ?fname ?lname ?mname WHERE { ?x rdf:type core:FacultyMember . ?x foaf:firstName ?fname . ?x foaf:lastName ?lname . FILTER (regex(?fname, "^D", "i")) FILTER (regex(?lname, "Nelson", "i")) }""" result = vivotools.vivo_sparql_query(query) print result
FILTER (?uritype IN (vivo:College, vivo:University, vivo:Department, ufVivo:AdministrativeUnit,vivo:Center, vivo:Institute, vivo:School, vivo:ExtensionUnit, vivo:Library, vivo:Program, vivo:Committee, vivo:Foundation, vivo:Laboratory, vivo:Division, vivo:Company, vivo:Association, vivo:ClinicalOrganization, vivo:Hospital, vivo:Publisher, ufVivo:NonGovernmentalOrganization, vivo:ResearchOrganization, vivo:StudentOrganization, vivo:Team, vivo:Consortium, vivo:Museum, vivo:AcademicDepartment)) . FILTER (!bound(?successor)) . } GROUP BY ?uri ?childuri ORDER BY ?uri ?childuri""" # Data from VIVO SPARQL query org_result = vivo_sparql_query(query)["results"]["bindings"] org_dict = {} for org in org_result: uri = org["uri"]["value"] org_dict[uri] = org_dict.get(uri, 0)+1 print len(org_dict), "orgs" result = {} uri = "http://vivo.ufl.edu/individual/n1278130" # UF #uri = "http://vivo.ufl.edu/individual/n8763427" # CTSI #uri = "http://vivo.ufl.edu/individual/n142500" # CTSI Biobehavioral core result = tree_build(uri, org_result) result["org_count"] = len(org_dict) orgs_file = open("orgs.json", "w") print >>orgs_file, json.dumps(result, indent=4)
the result as a JSON object Version 0.1 MC 2013-12-28 -- Initial version. """ __author__ = "Michael Conlon" __copyright__ = "Copyright 2013, University of Florida" __license__ = "BSD 3-Clause license" __version__ = "0.1" import vivotools as vt import json from datetime import datetime print datetime.now(),"Start" query = """ SELECT ?p ?o WHERE { <http://vivo.ufl.edu/individual/n25562> ?p ?o } ORDER BY ?p """ data=vt.vivo_sparql_query(query,debug=True) # show the encoded query # issue the query, return the data print "Retrieved data:\n" + json.dumps(data, sort_keys=True, indent=4) print "Items found = ",len(data["results"]["bindings"]) print datetime.now(),"Finish"
import json, vivotools query = """ SELECT ?x ?lname WHERE { ?x rdf:type foaf:Person . ?x foaf:lastName ?lname . FILTER (regex(?lname,"Conlon","i")) } """ print vivotools.get_vivo_uri() print vivotools.get_vivo_uri() data=vivotools.vivo_sparql_query(query) # issue the query, return the data print "Retrieved data:\n" + json.dumps(data, sort_keys=True, indent=4) # show the returned json object print "Items found = ",len(data["results"]["bindings"]) # count the items in the result set for item in data["results"]["bindings"]: # for each item, show the uri and last name print item["x"]["value"],item["lname"]["value"]
def update_conc(conc, concept, debug=False): """ for a concept, update the entry in the concordance for the concept or create a new entry if one does not exist. """ concept_uri = str(concept['uri']['value']) concept_name = concept['concept_name']['value'] npubs = concept['npub']['value'] if concept_uri in conc: entry = conc[concept_uri] else: entry = {'name' : concept_name, 'npubs' : npubs, 'concepts' : {}, 'authors': {}} # First we get the concordant concepts query = """ # # For a specified concept, find all the concepts that co-occur with the # specified concept in one or more academic articles. For each # co-occuring concept, return the name, uri and count of papers in which # the concept and the specified concept co-occur # SELECT ?concept_uri (MIN(DISTINCT ?xconcept_name) AS ?concept_name) (COUNT(DISTINCT ?pub_uri) AS ?count) WHERE { ?pub_uri vivo:hasSubjectArea <{uri}> . ?pub_uri a bibo:AcademicArticle . ?pub_uri vivo:hasSubjectArea ?concept_uri . ?concept_uri rdfs:label ?xconcept_name . FILTER(str(?concept_uri) != "{uri}") } GROUP BY ?concept_uri ORDER BY DESC(?count) """ query = query.replace("{uri}", concept_uri) result = vivo_sparql_query(query) if 'results' in result and 'bindings' in result['results'] and \ 'count' in result['results']['bindings'][0] and \ int(result['results']['bindings'][0]['count']['value']) != 0: rows = result['results']['bindings'] print 'concept',len(rows) # Replace concept content with current content concept_dict = {} for row in rows: concept_name = row['concept_name']['value'] concept_dict[concept_name] = {'concept_uri': row['concept_uri']['value'], 'count': row['count']['value']} entry['concepts'] = concept_dict # Second we get the concordant authors query = """ # # For a specified concept, find all the current UF authors that co-occur with the # specified concept in one or more academic articles. For each # co-occuring author, return the name, uri and count of papers in which # the author and the specified concept co-occur # SELECT ?author_uri (MIN(DISTINCT ?xauthor_name) AS ?author_name) (COUNT(DISTINCT ?pub_uri) AS ?count) WHERE { ?pub_uri vivo:hasSubjectArea <{uri}> . ?pub_uri a bibo:AcademicArticle . ?pub_uri vivo:informationResourceInAuthorship ?a . ?a vivo:linkedAuthor ?author_uri . ?author_uri a ufVivo:UFCurrentEntity . ?author_uri rdfs:label ?xauthor_name . } GROUP BY ?author_uri ORDER BY DESC(?count) """ query = query.replace("{uri}", concept_uri) result = vivo_sparql_query(query) if 'results' in result and 'bindings' in result['results'] and \ 'count' in result['results']['bindings'][0] and \ int(result['results']['bindings'][0]['count']['value']) != 0: rows = result['results']['bindings'] print 'author',len(rows) # Replace concept content with current content author_dict = {} for row in rows: author_name = row['author_name']['value'] author_dict[author_name] = {'author_uri': row['author_uri']['value'], 'count': row['count']['value']} entry['authors'] = author_dict conc[concept_uri] = entry return conc
__author__ = "Michael Conlon" __copyright__ = "Copyright 2013, University of Florida" __license__ = "BSD 3-Clause license" __version__ = "0.1" import vivotools as vt import random from datetime import datetime query = """ SELECT ?uri WHERE { ?uri a vivo:FacultyMember . ?uri a ufVivo:UFCurrentEntity . } """ print datetime.now(),"Gathering Current UF Faculty from VIVO" data = vt.vivo_sparql_query(query) print datetime.now(),"Current UF Faculty found = ",len(data["results"]["bindings"]) print datetime.now(),"Load data structure with results" d = [] for item in data["results"]["bindings"]: d.append(item["uri"]["value"]) print datetime.now(),"Select random sample" random.shuffle(d) print datetime.now(),"Show selected faculty by VIVO URI" for i in range(100): print d[i] print datetime.now(),"Finished"
def run_error_query(query_error): data = vivotools.vivo_sparql_query(query_error) for var in data["head"]["vars"]: report = sparql_variable_rename(var) + ":" +"\t\t\t" + data["results"]["bindings"][0][var.encode('ascii', 'ignore')]["value"] + "\n" return report