Esempio n. 1
0
def run_query_new(query, endpoint):
    """Query a SPARQL endpoint and return reportable results in text format.
    Modified for specific queries.

    Keyword arguments:
    query -- a SPARQL query file on disk
    endpoint -- an open SPARQL endpoint
    """
    result = ""
    data = vivotools.vivo_sparql_query(query, baseURL=endpoint)
    #print data
    #if (not(any(data["results"]["bindings"])))
    if len(data["results"]["bindings"]) == 0:
        result = "0 \t\t" + (sparql_variable_rename(data["head"]["vars"][0])) + "\n"
        return result
    else:
        result += str(len(data["results"]["bindings"])/2) +" \
            "+ (sparql_variable_rename(data["head"]["vars"][0])) + "\n"
        for var in data["head"]["vars"]:
            result += ("\t"+sparql_variable_rename(var)) + "\t\t\t"
        result += "\n"
        for item in data["results"]["bindings"]:
            for var in data["head"]["vars"]:
                result += "\t" + item[var]["value"] + "\t"
            result += "\n"
        #print result
        return result
Esempio n. 2
0
def run_query(query, endpoint, query_file):
        #print query
	# assume initially there are no errors
        error = False
        logging.info("Sending query " + query_file)
        data = vivotools.vivo_sparql_query(query, baseURL = endpoint)
        #print data
	for var in data["head"]["vars"]:
                #print var
		if (len(data["results"]["bindings"]) == 0):
			num_error_records = 0
			report = str(num_error_records) + "\t\t" +  sparql_variable_rename(var) + "\n"
		else:
			report = ""
			error = True
			num_error_records = len(data["results"]["bindings"])
			report = str(num_error_records) + "\t\t" +  sparql_variable_rename(var) + "\n"
			for error_record in data["results"]["bindings"]:
                                #print error_record
        			error_value = error_record[var.encode('ascii', 'ignore')]["value"]
				#print error_value
                                # insert variable name and error value
        			report += "\t\t\t" + error_value + "\n"
                                #print report
        		"""# insert code here to re-run reporting function
        		
        		if error:
                	query_error = str(query + "_error")
                	print run_error_query(query)
                	"""
                
	return report
def make_phone_dictionary(phone_dictionary={}, debug=False):
    """
    Extract all phone numbers in VIVO and organize them into a dictionary
    keyed by uri and each with a dictionary of phone and primary phone
    """
    query = tempita.Template("""
    SELECT ?uri ?phone ?primary
    WHERE {
    {?uri vivo:phoneNumber ?phone .}
    UNION {?uri vivo:primaryPhoneNumber ?primary .}
    }""")
    query = query.substitute()
    result = vt.vivo_sparql_query(query)
    try:
        count = len(result["results"]["bindings"])
    except:
        count = 0
    if debug:
        print query, count,\
        result["results"]["bindings"][0], result["results"]["bindings"][1]
    #
    i = 0
    while i < count:
        b = result["results"]["bindings"][i]
        uri = b['uri']['value']
        dict = {}
        if 'phone' in b:
            dict['phone'] = b['phone']['value']
        if 'primary' in b:
            dict['primary'] = b['primary']['value']
        if dict != {}:
            phone_dictionary[uri] = dict
        i = i + 1
    return phone_dictionary
Esempio n. 4
0
def run_query(query, endpoint, query_file):
    """Query a SPARQL endpoint and return reportable results in text format.

    Keyword arguments:
    query -- a SPARQL query file on disk
    endpoint -- an open SPARQL endpoint
    query_file -- the SPARQL query file name
    """
    #print query
    logging.info("Sending query " + query_file)
    data = vivotools.vivo_sparql_query(query, baseURL=endpoint)
    #print data

    for var in data["head"]["vars"]:
        #print var
        if len(data["results"]["bindings"]) == 0:
            num_error_records = 0
            report = str(num_error_records) + "\t\t" +  sparql_variable_rename(var) + "\n"
        else:
            report = ""
            num_error_records = len(data["results"]["bindings"])
            report = str(num_error_records) + "\t\t" +  sparql_variable_rename(var) + "\n"
            for error_record in data["results"]["bindings"]:
                #print error_record
                error_value = error_record[var.encode('ascii', 'ignore')]["value"]
                #print error_value
                # insert variable name and error value
                report += "\t\t\t" + error_value + "\n"
                #print report
    return report
def t32_dept_counts(uri):
    """
    Given a URI of a unit, count five things the NIH wants to know about
    the unit for a T32 application
    """
    query = tempita.Template("""
#
# Count the faculty, graduate students and postdocs in a unit
#
    SELECT
           (COUNT(DISTINCT ?fac) AS ?faculty_count)
           (COUNT(DISTINCT ?pre) AS ?predoc_count)
           (COUNT(DISTINCT ?pos) AS ?postdoc_count)
           (COUNT(DISTINCT ?pre_sup) AS ?predoc_supported)
           (COUNT(DISTINCT ?pos_sup) AS ?postdoc_supported)
    WHERE {
        {
        ?fac ufVivo:homeDept <{{uri}}> .
        ?fac a vivo:FacultyMember .
        }
        UNION {
        ?pre ufVivo:homeDept <{{uri}}> .
        ?pre a vivo:GraduateStudent .
        }
        UNION {
        ?pos ufVivo:homeDept <{{uri}}> .
        ?pos a vivo:Postdoc .
        }
        UNION {
        ?pre_sup ufVivo:homeDept <{{uri}}> .
        ?pre_sup a vivo:GraduateStudent .
        ?pre_sup vivo:hasPrincipalInvestigatorRole ?role .
        ?role vivo:roleIn ?grant .
        ?grant a vivo:Grant .
        }
        UNION {
        ?pos_sup ufVivo:homeDept <{{uri}}> .
        ?pos_sup a vivo:Postdoc .
        ?pos_sup vivo:hasPrincipalInvestigatorRole ?role .
        ?role vivo:roleIn ?grant .
        ?grant a vivo:Grant .
        }
    }
    GROUP BY ?uri
    """)
    query = query.substitute(uri=uri)
    result = vt.vivo_sparql_query(query)
    t32 = {}
    for vname in ['faculty_count', 'predoc_count', 'postdoc_count',
                  'predoc_supported','postdoc_supported']:
        try:
            t32[vname] = \
                result['results']['bindings'][0][vname]['value']
        except:
            pass
    return t32
Esempio n. 6
0
def run_error_query(query_error):
    # portable for *NIX/Windows
    if os.name == "nt":
        path = os.getcwd() + "\\error_queries\\"
    elif os.name == "posix":
        path = os.getcwd() + "/error_queries/"
    logging.info("Reading error query from " + path + query_error)
    data = vivotools.vivo_sparql_query(query_error, baseURL=endpoint)
    print data
    for var in data["head"]["vars"]:
        report = sparql_variable_rename(var) + ":" +"\t\t\t" + data["results"]["bindings"][0][var.encode('ascii', 'ignore')]["value"] + "\n"
    return report
def case0(last,first,middle,debug):
    query = tempita.Template("""
#search on last name
SELECT ?x ?lname WHERE
{
?x rdf:type core:FacultyMember .
?x foaf:lastName ?lname .
FILTER (regex(?lname, '''{{last}}''', "i"))
}""")
    query = query.substitute(last=last)
    result = vivotools.vivo_sparql_query(query)
    if debug:
        print query,result
    return result
def case01(last,first,middle,debug):
    query = tempita.Template("""
#search on last name
SELECT ?x ?lname ?ufid WHERE
{
?x rdf:type foaf:Person .
?x foaf:lastName ?lname .
?x ufVivo:ufid ?ufid .
FILTER (regex(?lname, '''{{last}}''', "i"))
}""")
    query = query.substitute(last=last)
    result = vivotools.vivo_sparql_query(query)
    if debug:
        print query,result
    return result
def find_person(ufid):
    """
    Given a UFID, find the URI of the person that has that ufidid
    """
    query = tempita.Template("""
    SELECT ?uri
    WHERE {
        ?uri ufVivo:ufid "{{ufid}}" .
    }
    """)
    query = query.substitute(ufid=ufid)
    result = vt.vivo_sparql_query(query)
    try:
        uri = result['results']['bindings'][0]['uri']['value']
    except:
        uri = None
    return uri
Esempio n. 10
0
def find_dept(deptid):
    """
    Given a UF deptid, find the URI of the org that has that deptid
    """
    query = tempita.Template("""
    SELECT ?uri
    WHERE {
        ?uri ufVivo:deptID "{{deptid}}" .
    }
    """)
    query = query.substitute(deptid=deptid)
    result = vt.vivo_sparql_query(query)
    try:
        uri = result['results']['bindings'][0]['uri']['value']
    except:
        uri = None
    return uri
def case3(last,first,middle,debug):
    query = tempita.Template("""
#search on last, first initial, middle initial
SELECT ?x ?fname ?lname ?mname WHERE
{
?x rdf:type core:FacultyMember .
?x foaf:firstName ?fname .
?x foaf:lastName ?lname .
?x core:middleName ?mname .
FILTER (regex(?fname, '''^{{first}}''', "i"))
FILTER (regex(?mname, '''^{{middle}}''', "i"))
FILTER (regex(?lname, '''{{last}}''', "i"))
}""")
    query = query.substitute(last=last,first=first,middle=middle)
    result = vivotools.vivo_sparql_query(query)
    if debug:
        print query,result
    return result
def make_items(debug=False):
    """
    Extract all the papers for the feed and organize them as feed items
    in a list
    """
    query = tempita.Template("""SELECT DISTINCT ?x ?dt ?label WHERE {
      ?x rdf:type bibo:Document .
      ?x core:dateTimeValue ?dtv .
      ?x rdfs:label ?label . 
      ?x ufVivo:dateHarvested ?dh .
      ?dtv core:dateTime ?dt .
      FILTER regex(?dh,"{{expression}}")
      }
      ORDER BY DESC(?dt)""")
    query = query.substitute(expression=make_date_expression())
    #print query
    result = vivotools.vivo_sparql_query(query)
    #print result
    try:
        count = len(result["results"]["bindings"])
    except:
        count = 0
    if debug:
        print query, count, result["results"]["bindings"][0],\
            result["results"]["bindings"][1]
    i = 0
    date_cutoff = TODAY - datetime.timedelta(days=DAYS_SINCE_PUBLICATION)
    if debug:
        print "Cutoff date for publications is ", date_cutoff
    items = []
    while i < count:
        b = result["results"]["bindings"][i]
        title = b['label']['value']
        uri = b['x']['value']
        dt = b['dt']['value']
        date_published = datetime.date(int(dt[0:4]), int(dt[5:7]),
                                       int(dt[8:10]))
        if date_published >= date_cutoff:
            items.append(PyRSS2Gen.RSSItem(title=title,
                                           link=uri,
                                           pubDate=datetime.datetime.now()))
        i = i + 1
    return items
Esempio n. 13
0
def run_query_new(query,endpoint):
    error = False
    result=""
    data = vivotools.vivo_sparql_query(query, baseURL = endpoint)
    #print data
    if (len(data["results"]["bindings"]) == 0):
        result = "0 \t\t" + (sparql_variable_rename(data["head"]["vars"][0])) + "\n"
        return result
    else:
        result += str(len(data["results"]["bindings"])) +" "+ (sparql_variable_rename(data["head"]["vars"][0])) + "\n"
        print data["head"]["vars"]
        for var in data["head"]["vars"]:
            result += ("\t\t"+sparql_variable_rename(var)) + "\t\t\t\t\t"
        result += "\n"
        for item in data["results"]["bindings"]:
            for var in data["head"]["vars"]:
                result += item[var]["value"] + "\t\t\t\t\t\t"
            result += "\n"
        #print result
        return result
def make_data_dictionary(pred="vivo:totalAwardAmount", debug=False):
    """
    Create a dictionary of the specified data pred.  Key is uri.
    """
    query = """
    SELECT ?uri ?data
    WHERE {
        ?uri {{pred}} ?data .
    }
    """
    query = query.replace('{{pred}}', pred)
    result = vivo_sparql_query(query)
    if debug:
        print "Query = ", query
        print "Result = ", len(result['results']['bindings'])
    dictionary = {}
    for row in result['results']['bindings']:
        uri = row['uri']['value']
        data = row['data']['value']
        dictionary[uri] = data
    return dictionary
def make_data_dictionary(pred="vivo:totalAwardAmount", debug=False):
    """
    Create a dictionary of the specified data pred.  Key is uri.
    """
    query = """
    SELECT ?uri ?data
    WHERE {
        ?uri {{pred}} ?data .
    }
    """
    query = query.replace('{{pred}}', pred)
    result = vivo_sparql_query(query)
    if debug:
        print "Query = ", query
        print "Result = ", len(result['results']['bindings'])
    dictionary = {}
    for row in result['results']['bindings']:
        uri = row['uri']['value']
        data = row['data']['value']
        dictionary[uri] = data
    return dictionary
Esempio n. 16
0
def run_query(query, endpoint, query_file):
    # assume initially there are no errors
    report = ""
    error_report = ""
    error = False
    data = vivotools.vivo_sparql_query(query, baseURL=endpoint)
    for var in data["head"]["vars"]:
        error_value = \
        data["results"]["bindings"][0][var.encode('ascii', 'ignore')]["value"]
		# insert variable name and error value
        report = error_value + "\t\t" +  sparql_variable_rename(var) + "\n"
        if int((data["results"]["bindings"][0][var.encode('ascii', 'ignore')]["value"])) != 0:
            error = True
        if error:
            # if we've encountered a non-zero value, run the corresponding error query
            # make sure the file exists
            query_error = str(query_file[:-3] + "_error" + ".rq")
            try:
                error_report = run_error_query(query_error)
            except IOError:
                logging.critical("Critical error: " + query_error + ".rq does not exist.")
    return {"report":report, "error_report":error_report}
# VIVO URI tester
# usage: python uritester.py (integer) > outputfile
# specify the number of URIs desired in (integer), and write to outputfile

import vivotools
print vivotools.get_vivo_uri()
print vivotools.get_vivo_uri()

query="""
#search on last, first initial, middle initial
SELECT ?x ?fname ?lname ?mname WHERE
{
?x rdf:type core:FacultyMember .
?x foaf:firstName ?fname .
?x foaf:lastName ?lname .
FILTER (regex(?fname, "^D", "i"))
FILTER (regex(?lname, "Nelson", "i"))
}"""
result = vivotools.vivo_sparql_query(query)
print result
Esempio n. 18
0
        FILTER (?uritype IN (vivo:College, vivo:University, vivo:Department,
            ufVivo:AdministrativeUnit,vivo:Center, vivo:Institute, vivo:School, vivo:ExtensionUnit, vivo:Library, vivo:Program, vivo:Committee,
            vivo:Foundation, vivo:Laboratory, vivo:Division, vivo:Company,
            vivo:Association, vivo:ClinicalOrganization, vivo:Hospital,
            vivo:Publisher, ufVivo:NonGovernmentalOrganization,
            vivo:ResearchOrganization,
            vivo:StudentOrganization, vivo:Team, vivo:Consortium,
            vivo:Museum, vivo:AcademicDepartment)) .
        FILTER (!bound(?successor)) .
        }
        GROUP BY ?uri ?childuri
        ORDER BY ?uri ?childuri"""

# Data from VIVO SPARQL query

org_result = vivo_sparql_query(query)["results"]["bindings"]

org_dict = {}
for org in org_result:
    uri = org["uri"]["value"]
    org_dict[uri] = org_dict.get(uri, 0)+1
print len(org_dict), "orgs"

result = {}
uri = "http://vivo.ufl.edu/individual/n1278130" # UF
#uri = "http://vivo.ufl.edu/individual/n8763427" # CTSI
#uri = "http://vivo.ufl.edu/individual/n142500" # CTSI Biobehavioral core
result = tree_build(uri, org_result)
result["org_count"] = len(org_dict)
orgs_file = open("orgs.json", "w")
print >>orgs_file, json.dumps(result, indent=4)
Esempio n. 19
0
    the result as a JSON object

    Version 0.1 MC 2013-12-28
    --  Initial version.

"""

__author__      = "Michael Conlon"
__copyright__   = "Copyright 2013, University of Florida"
__license__     = "BSD 3-Clause license"
__version__     = "0.1"

import vivotools as vt
import json
from datetime import datetime

print datetime.now(),"Start"

query = """
    SELECT ?p ?o
    WHERE {
      <http://vivo.ufl.edu/individual/n25562> ?p ?o
    }
    ORDER BY ?p
    """

data=vt.vivo_sparql_query(query,debug=True) # show the encoded query                                # issue the query, return the data
print "Retrieved data:\n" + json.dumps(data, sort_keys=True, indent=4)
print "Items found = ",len(data["results"]["bindings"])
print datetime.now(),"Finish"
import json, vivotools

query = """
    SELECT ?x ?lname WHERE
    {
    ?x rdf:type foaf:Person .
    ?x foaf:lastName ?lname .
    FILTER (regex(?lname,"Conlon","i"))
    }
"""

print vivotools.get_vivo_uri()
print vivotools.get_vivo_uri()
data=vivotools.vivo_sparql_query(query)                                 # issue the query, return the data
print "Retrieved data:\n" + json.dumps(data, sort_keys=True, indent=4)  # show the returned json object
print "Items found = ",len(data["results"]["bindings"])                 # count the items in the result set
for item in data["results"]["bindings"]:                                # for each item, show the uri and last name
    print item["x"]["value"],item["lname"]["value"]

Esempio n. 21
0
def update_conc(conc, concept, debug=False):
    """
    for a concept, update the entry in the concordance for the concept
    or create a new entry if one does not exist.
    """
    concept_uri = str(concept['uri']['value'])
    concept_name = concept['concept_name']['value']
    npubs = concept['npub']['value']
    if concept_uri in conc:
        entry = conc[concept_uri]
    else:
        entry = {'name' : concept_name,
                 'npubs' : npubs,
                 'concepts' : {},
                 'authors': {}}

    #   First we get the concordant concepts

    query = """
    #
    #   For a specified concept, find all the concepts that co-occur with the
    #   specified concept in one or more academic articles.  For each
    #   co-occuring concept, return the name, uri and count of papers in which
    #   the concept and the specified concept co-occur
    #
    SELECT ?concept_uri (MIN(DISTINCT ?xconcept_name) AS ?concept_name)
        (COUNT(DISTINCT ?pub_uri) AS ?count)
    WHERE {
        ?pub_uri vivo:hasSubjectArea <{uri}> .
        ?pub_uri a bibo:AcademicArticle .
        ?pub_uri vivo:hasSubjectArea ?concept_uri .
        ?concept_uri rdfs:label ?xconcept_name .
        FILTER(str(?concept_uri) !=
            "{uri}")
    }
    GROUP BY ?concept_uri
    ORDER BY DESC(?count)
    """
    query = query.replace("{uri}", concept_uri)
    result = vivo_sparql_query(query)
    if 'results' in result and 'bindings' in result['results'] and \
       'count' in result['results']['bindings'][0] and \
       int(result['results']['bindings'][0]['count']['value']) != 0:
        rows = result['results']['bindings']
        print 'concept',len(rows)

        # Replace concept content with current content

        concept_dict = {}
        for row in rows:
            concept_name = row['concept_name']['value']
            concept_dict[concept_name] = {'concept_uri':
                                          row['concept_uri']['value'],
                                          'count':
                                          row['count']['value']}
        entry['concepts'] = concept_dict
          
    #   Second we get the concordant authors

        query = """
    #
    #   For a specified concept, find all the current UF authors that co-occur with the
    #   specified concept in one or more academic articles.  For each
    #   co-occuring author, return the name, uri and count of papers in which
    #   the author and the specified concept co-occur
    #
    SELECT ?author_uri (MIN(DISTINCT ?xauthor_name) AS ?author_name)
        (COUNT(DISTINCT ?pub_uri) AS ?count)
    WHERE {
        ?pub_uri vivo:hasSubjectArea <{uri}> .
        ?pub_uri a bibo:AcademicArticle .
        ?pub_uri vivo:informationResourceInAuthorship ?a .
        ?a vivo:linkedAuthor ?author_uri .
        ?author_uri a ufVivo:UFCurrentEntity .
        ?author_uri rdfs:label ?xauthor_name .
    }
    GROUP BY ?author_uri
    ORDER BY DESC(?count)
    """
    query = query.replace("{uri}", concept_uri)
    result = vivo_sparql_query(query)
    if 'results' in result and 'bindings' in result['results'] and \
       'count' in result['results']['bindings'][0] and \
       int(result['results']['bindings'][0]['count']['value']) != 0:
        rows = result['results']['bindings']
        print 'author',len(rows)

        # Replace concept content with current content

        author_dict = {}
        for row in rows:
            author_name = row['author_name']['value']
            author_dict[author_name] = {'author_uri':
                                          row['author_uri']['value'],
                                          'count':
                                          row['count']['value']}
        entry['authors'] = author_dict
       
 
    conc[concept_uri] = entry
    return conc
Esempio n. 22
0
__author__      = "Michael Conlon"
__copyright__   = "Copyright 2013, University of Florida"
__license__     = "BSD 3-Clause license"
__version__     = "0.1"

import vivotools as vt
import random
from datetime import datetime

query = """
    SELECT ?uri WHERE
    {
    ?uri a vivo:FacultyMember .
    ?uri a ufVivo:UFCurrentEntity .
    }
"""
print datetime.now(),"Gathering Current UF Faculty from VIVO"
data = vt.vivo_sparql_query(query) 
print datetime.now(),"Current UF Faculty found = ",len(data["results"]["bindings"])
print datetime.now(),"Load data structure with results"
d = []
for item in data["results"]["bindings"]:
    d.append(item["uri"]["value"])
print datetime.now(),"Select random sample"
random.shuffle(d)
print datetime.now(),"Show selected faculty by VIVO URI"
for i in range(100):
    print d[i]
print datetime.now(),"Finished"

Esempio n. 23
0
def run_error_query(query_error):
        data = vivotools.vivo_sparql_query(query_error)
        for var in data["head"]["vars"]:
                report = sparql_variable_rename(var) + ":" +"\t\t\t" + data["results"]["bindings"][0][var.encode('ascii', 'ignore')]["value"] + "\n"
        return report