Python QueryHelper Examples

Programming Language: Python

Namespace/Package Name: queryHelper

Class/Type: QueryHelper

Examples at hotexamples.com: 2

Python QueryHelper - 2 examples found. These are the top rated real world Python examples of queryHelper.QueryHelper extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

entityTriples(1)

getAllClasses(1)

getAllProperties(1)

getClassDirectSupers(1)

getOntology(1)

getPropDirectSupers(1)

Example #1

Show file

File: graph.py Project: easton402/OntoSPy

	def __loadRDF(self, source, text, endpoint, rdf_format):
		"""
		After a graph has been loaded successfully, set up all params
		"""
		
		# LOAD THE GRAPH
				
		if text:
			self.IS_TEXT = True
			rdf_format = rdf_format or "turtle"
		
		
		elif endpoint:
			self.IS_ENDPOINT = True
			# replace graph with ConjunctiveGraph
			self.rdfgraph = rdflib.ConjunctiveGraph(store=SPARQLStore(source))			
			self.graphuri = source	# default uri is www location

				
		else:

			if type(source) == type("string"):
				self.IS_URL = True				
				if source.startswith("www."): #support for lazy people
					source = "http://%s" % str(source)
				self.graphuri = source	# default uri is www location
				rdf_format = rdf_format or guess_fileformat(source)

			elif type(source) == file:
				self.IS_FILE = True				
				self.graphuri = source.name # default uri is filename
				rdf_format = rdf_format or guess_fileformat(source.name)
			
			else:
				raise Exception("You passed an unknown object. Only URIs and files are accepted.") 
			
		#FINALLY, TRY LOADING:		

		try:
			if self.IS_TEXT:			
				self.rdfgraph.parse(data=source, format=rdf_format)
				printDebug("----------\nLoaded %d triples from text" % len(self.rdfgraph))
			elif self.IS_ENDPOINT:
				printDebug("Accessing SPARQL Endpoint <%s>" % self.graphuri)
				printDebug("(note: support for sparql endpoints is still experimental)")
			else:
				self.rdfgraph.parse(source, format=rdf_format)
				printDebug("----------\nLoaded %d triples from <%s>" % (len(self.rdfgraph), self.graphuri))
			# set up the query helper too
			self.queryHelper = QueryHelper(self.rdfgraph)	
			
		
		except:
			printDebug("\nError Parsing Graph (assuming RDF serialization was *%s*)\n" % (rdf_format))	 
			raise

Example #2

Show file

File: graph.py Project: easton402/OntoSPy

class Graph(object):
	"""
	Object that scan an rdf graph for schema definitions (aka 'ontologies') 
	
	In [1]: import ontospy2
	INFO:rdflib:RDFLib Version: 4.2.0

	In [2]: g = ontospy2.Graph("npgcore_latest.ttl")
	Loaded 3478 triples
	Ontologies found: 1
	
	"""

	def __init__(self, source, text=False, endpoint=False, rdf_format=None):
		"""
		Load the graph in memory, then setup all necessary attributes.
		"""
		super(Graph, self).__init__() 

		self.rdfgraph = rdflib.Graph()			
			
		self.graphuri	= None
		self.queryHelper = None # instantiated after we have a graph
		
		self.ontologies = []
		self.classes = []	
		self.namespaces = []
		
		self.properties = [] 
		self.annotationProperties = [] 
		self.objectProperties = []
		self.datatypeProperties = []
		
		self.toplayer = []
		self.toplayerProperties = []
		
		# keep track of the rdf source		
		self.IS_ENDPOINT = False
		self.IS_FILE = False
		self.IS_URL = False
		self.IS_TEXT = False
		
		# finally		
		self.__loadRDF(source, text, endpoint, rdf_format)
		# extract entities into
		self._scan()

	
	def __repr__(self):
		return "<OntoSPy Graph (%d triples)>" % (len(self.rdfgraph))
				


	
	def __loadRDF(self, source, text, endpoint, rdf_format):
		"""
		After a graph has been loaded successfully, set up all params
		"""
		
		# LOAD THE GRAPH
				
		if text:
			self.IS_TEXT = True
			rdf_format = rdf_format or "turtle"
		
		
		elif endpoint:
			self.IS_ENDPOINT = True
			# replace graph with ConjunctiveGraph
			self.rdfgraph = rdflib.ConjunctiveGraph(store=SPARQLStore(source))			
			self.graphuri = source	# default uri is www location

				
		else:

			if type(source) == type("string"):
				self.IS_URL = True				
				if source.startswith("www."): #support for lazy people
					source = "http://%s" % str(source)
				self.graphuri = source	# default uri is www location
				rdf_format = rdf_format or guess_fileformat(source)

			elif type(source) == file:
				self.IS_FILE = True				
				self.graphuri = source.name # default uri is filename
				rdf_format = rdf_format or guess_fileformat(source.name)
			
			else:
				raise Exception("You passed an unknown object. Only URIs and files are accepted.") 
			
		#FINALLY, TRY LOADING:		

		try:
			if self.IS_TEXT:			
				self.rdfgraph.parse(data=source, format=rdf_format)
				printDebug("----------\nLoaded %d triples from text" % len(self.rdfgraph))
			elif self.IS_ENDPOINT:
				printDebug("Accessing SPARQL Endpoint <%s>" % self.graphuri)
				printDebug("(note: support for sparql endpoints is still experimental)")
			else:
				self.rdfgraph.parse(source, format=rdf_format)
				printDebug("----------\nLoaded %d triples from <%s>" % (len(self.rdfgraph), self.graphuri))
			# set up the query helper too
			self.queryHelper = QueryHelper(self.rdfgraph)	
			
		
		except:
			printDebug("\nError Parsing Graph (assuming RDF serialization was *%s*)\n" % (rdf_format))	 
			raise




	def serialize(self, rdf_format="turtle"):
		""" Shortcut that outputs the graph """
		return self.rdfgraph.serialize(format=rdf_format)
			
	
	def sparql(self, stringa):
		""" wrapper around a sparql query """
		qres = self.rdfgraph.query(stringa)
		return list(qres)
			

	def __extractNamespaces(self):
		""" 
		Extract graph namespaces.
		Namespaces are given in this format:

			In [01]: for x in graph.namespaces():
					....:			print x
			('xml', rdflib.URIRef('http://www.w3.org/XML/1998/namespace'))
			('', rdflib.URIRef('http://cohereweb.net/ontology/cohere.owl#'))
			(u'owl', rdflib.URIRef('http://www.w3.org/2002/07/owl#'))
			('rdfs', rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#'))
			('rdf', rdflib.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#'))
			(u'xsd', rdflib.URIRef('http://www.w3.org/2001/XMLSchema#'))

		We assume that a base namespace is implied by an empty prefix		
		"""

		exit = []

		if self.IS_ENDPOINT==True:
			return False

		else:
			
			if self.graphuri not in [y for x,y in self.rdfgraph.namespaces()]:
				# if not base namespace is set, try to simulate one 
				self.rdfgraph.bind("_file_", rdflib.Namespace(self.graphuri))
	
			self.namespaces = sorted(self.rdfgraph.namespaces())
		


	
	# ------------	
	# === main method === #	 
	# ------------
	
	def _scan(self, source=None, text=False, endpoint=False, rdf_format=None):
		""" 
		scan a source of RDF triples 
		build all the objects to deal with the ontology/ies pythonically
				
		In [1]: g.scan("npgcore_latest.ttl")
		Ontologies found: 1
		Out[3]: [<OntoSPy: Ontology object for uri *http://ns.nature.com/terms/*>]
		
		"""
		
		if source: # add triples dynamically
			self.__loadRDF(source, text, endpoint, rdf_format)
		
		printDebug("started scanning...\n----------")
					
		self.__extractNamespaces()
		
		self.__extractOntologies()
		printDebug("Ontologies found: %d" % len(self.ontologies))
						
		self.__extractClasses()
		printDebug("Classes found...: %d" % len(self.classes))
		
		self.__extractProperties()
		printDebug("Properties found: %d" % len(self.properties))
		printDebug("Annotation......: %d" % len(self.annotationProperties))
		printDebug("Datatype........: %d" % len(self.datatypeProperties))
		printDebug("Object..........: %d" % len(self.objectProperties))
		
		self.__computeTopLayer()

			
		




	
	
	def __extractOntologies(self, exclude_BNodes = False, return_string=False):
		"""
		returns Ontology class instances
		
        [ a owl:Ontology ;
            vann:preferredNamespacePrefix "bsym" ;
            vann:preferredNamespaceUri "http://bsym.bloomberg.com/sym/" ],
			
			
				
		"""
		out = []
	
		qres = self.queryHelper.getOntology()

		if qres:
			# NOTE: SPARQL returns a list of rdflib.query.ResultRow (~ tuples..)
			
			for candidate in qres:
				if isBlankNode(candidate[0]):
					if exclude_BNodes:
						continue
					else:
						checkDC_ID = [x for x in self.rdfgraph.objects(candidate[0], rdflib.namespace.DC.identifier)]
						if checkDC_ID:
							out += [Ontology(checkDC_ID[0])]
						else:
							vannprop = rdflib.URIRef("http://purl.org/vocab/vann/preferredNamespaceUri")
							vannpref = rdflib.URIRef("http://purl.org/vocab/vann/preferredNamespacePrefix")
							checkDC_ID = [x for x in self.rdfgraph.objects(candidate[0], vannprop)]
							if checkDC_ID:
								checkDC_prefix = [x for x in self.rdfgraph.objects(candidate[0], vannpref)]
								if checkDC_prefix:
									out += [Ontology(checkDC_ID[0], prefPrefix=checkDC_prefix[0])]
								else:
									out += [Ontology(checkDC_ID[0])]
						
				else:
					out += [Ontology(candidate[0])]
			
			
		else:
			pass
			# printDebug("No owl:Ontologies found")
			
		#finally		
		self.ontologies = out
		# add all annotations/triples
		for onto in self.ontologies:
			onto.triples = self.queryHelper.entityTriples(onto.uri)
		


	##################
	#  
	#  METHODS for MANIPULATING RDFS/OWL CLASSES 
	# 
	#  RDFS:class vs OWL:class cf. http://www.w3.org/TR/owl-ref/ section 3.1
	#
	##################


	def __extractClasses(self):
		""" 
		2015-06-04: removed sparql 1.1 queries
		2015-05-25: optimized via sparql queries in order to remove BNodes
		2015-05-09: new attempt 
		
		Note: queryHelper.getAllClasses() returns a list of tuples, 
		(class, classRDFtype) 
		so in some cases that's duplicates if a class is both RDFS.CLass and OWL.Class
		In this case we keep only OWL.Class as it is more informative.
		"""
		self.classes = [] # @todo: keep adding? 
		
		qres = self.queryHelper.getAllClasses()

		for candidate in qres:
			
			test_existing_cl = self.getClass(uri=candidate[0])
			if not test_existing_cl:
				# create it
				self.classes += [OntoClass(candidate[0], candidate[1], self.namespaces)]
			else:
				# update it
				if candidate[1] == rdflib.OWL.Class:
					# prefer OWL.Class over RDFS.Class
					test_existing_cl.rdftype = rdflib.OWL.Class 
					
				
		
		#add more data
		for aClass in self.classes:
			
			aClass.triples = self.queryHelper.entityTriples(aClass.uri)
			aClass._buildGraph() # force construction of mini graph
			
			aClass.queryHelper = self.queryHelper
			
			# attach to an ontology 
			for uri in aClass.getValuesForProperty(rdflib.RDFS.isDefinedBy):
				onto = self.getOntology(str(uri))
				if onto:
					onto.classes += [aClass]
					aClass.ontology = onto
					
			# add direct Supers				
			directSupers = self.queryHelper.getClassDirectSupers(aClass.uri)
			
			for x in directSupers:
				superclass = self.getClass(uri=x[0])
				if superclass: 
					aClass._parents.append(superclass)
					
					# add inverse relationships (= direct subs for superclass)
					if aClass not in superclass.children():
						 superclass._children.append(aClass)
			



	def __extractProperties(self):
		""" 
		2015-06-04: removed sparql 1.1 queries
		2015-06-03: analogous to get classes	
		
		# instantiate properties making sure duplicates are pruned
		# but the most specific rdftype is kept 
		# eg OWL:ObjectProperty over RDF:property
			
		"""
		self.properties = [] # @todo: keep adding? 
		self.annotationProperties = [] 
		self.objectProperties = []
		self.datatypeProperties = [] 
		
		qres = self.queryHelper.getAllProperties()
				
		for candidate in qres:

			test_existing_prop = self.getProperty(uri=candidate[0])
			if not test_existing_prop:
				# create it
				self.properties += [OntoProperty(candidate[0], candidate[1], self.namespaces)]
			else:
				# update it
				if candidate[1] and (test_existing_prop.rdftype == rdflib.RDF.Property):
					test_existing_prop.rdftype = inferMainPropertyType(candidate[1])


		#add more data
		for aProp in self.properties:
			
			if aProp.rdftype == rdflib.OWL.DatatypeProperty:
				self.datatypeProperties += [aProp]
			elif aProp.rdftype == rdflib.OWL.AnnotationProperty:
				self.annotationProperties += [aProp]
			elif aProp.rdftype == rdflib.OWL.ObjectProperty:
				self.objectProperties += [aProp]
			else:
				pass
			
			aProp.triples = self.queryHelper.entityTriples(aProp.uri)
			aProp._buildGraph() # force construction of mini graph

			# attach to an ontology [2015-06-15: no property type distinction yet]
			for uri in aProp.getValuesForProperty(rdflib.RDFS.isDefinedBy):
				onto = self.getOntology(str(uri))
				if onto:
					onto.properties += [aProp]
					aProp.ontology = onto
					
					
					
			self.__buildDomainRanges(aProp)
			
			# add direct Supers				
			directSupers = self.queryHelper.getPropDirectSupers(aProp.uri)
			
			for x in directSupers:
				superprop = self.getProperty(uri=x[0])
				if superprop: 
					aProp._parents.append(superprop)
				
					# add inverse relationships (= direct subs for superprop)
					if aProp not in superprop.children():
						 superprop._children.append(aProp)
		
					
					

	def getClass(self, id=None, uri=None, match=None):
		""" 
		get the saved-class with given ID or via other methods...
		
		Note: it tries to guess what is being passed..
	
		In [1]: g.getClass(uri='http://www.w3.org/2000/01/rdf-schema#Resource')
		Out[1]: <Class *http://www.w3.org/2000/01/rdf-schema#Resource*>
		
		In [2]: g.getClass(10)
		Out[2]: <Class *http://purl.org/ontology/bibo/AcademicArticle*> 

		In [3]: g.getClass(match="person")
		Out[3]: 
		[<Class *http://purl.org/ontology/bibo/PersonalCommunicationDocument*>,
		 <Class *http://purl.org/ontology/bibo/PersonalCommunication*>,
		 <Class *http://xmlns.com/foaf/0.1/Person*>]
		
		"""
		
		if not id and not uri and not match:
			return None
			
		if type(id) == type("string"):
			uri = id
			id = None
			if not uri.startswith("http://"):
				match = uri
				uri = None
		if match:
			if type(match) != type("string"):
				return []
			res = []
			for x in self.classes:
				if match.lower() in x.uri.lower():
					res += [x]
			return res
		else:
			for x in self.classes:
				if id and x.id == id:
					return x
				if uri and x.uri.lower() == uri.lower():
					return x
			return None


	def getProperty(self, id=None, uri=None, match=None):
		""" 
		get the saved-class with given ID or via other methods...
		
		Note: analogous to getClass method		
		"""
		
		if not id and not uri and not match:
			return None
			
		if type(id) == type("string"):
			uri = id
			id = None
			if not uri.startswith("http://"):
				match = uri
				uri = None
		if match:
			if type(match) != type("string"):
				return []
			res = []
			for x in self.properties:
				if match.lower() in x.uri.lower():
					res += [x]
			return res
		else:
			for x in self.properties:
				if id and x.id == id:
					return x
				if uri and x.uri.lower() == uri.lower():
					return x
			return None
			

	def getOntology(self, id=None, uri=None, match=None):
		""" 
		get the saved-ontology with given ID or via other methods...	
		"""
		
		if not id and not uri and not match:
			return None
			
		if type(id) == type("string"):
			uri = id
			id = None
			if not uri.startswith("http://"):
				match = uri
				uri = None
		if match:
			if type(match) != type("string"):
				return []
			res = []
			for x in self.ontologies:
				if match.lower() in x.uri.lower():
					res += [x]
			return res
		else:
			for x in self.ontologies:
				if id and x.id == id:
					return x
				if uri and x.uri.lower() == uri.lower():
					return x
			return None
			
								

	def __computeTopLayer(self):

		exit = []
		for c in self.classes:
			if not c.parents():
				exit += [c]
		self.toplayer = exit # sorted(exit, key=lambda x: x.id) # doesnt work

		# properties 
		exit = []
		for c in self.properties:
			if not c.parents():
				exit += [c]
		self.toplayerProperties = exit # sorted(exit, key=lambda x: x.id) # doesnt work
		

	def printClassTree(self, element = None, showids=True, labels=False):
		""" 
		Print nicely into stdout the class tree of an ontology 
		
		Note: indentation is made so that ids up to 3 digits fit in, plus a space.
		[123]1--
		[1]123--
		[12]12--
		"""
		
		if not element:	 # first time
			for x in self.toplayer:
				printGenericTree(x, 0, showids, labels)
		
		else:
			printGenericTree(element, 0, showids, labels)		


	def printPropertyTree(self, element = None, level=0, showids=True, labels=False):
		""" 
		Print nicely into stdout the property tree of an ontology 
		
		Note: indentation is made so that ids up to 3 digits fit in, plus a space.
		[123]1--
		[1]123--
		[12]12--
		"""
		
		if not element:	 # first time
			for x in self.toplayerProperties:
				printGenericTree(x, 0, level, showids)
		
		else:
			printGenericTree(element, 0, showids, labels)
			
			

	###########

	# METHODS for MANIPULATING RDFS/OWL PROPERTIES

	###########



	def __buildDomainRanges(self, aProp):			
		"""
		extract domain/range details and add to Python objects
		"""
		domains = aProp.rdfgraph.objects(None, rdflib.RDFS.domain)
		ranges =  aProp.rdfgraph.objects(None, rdflib.RDFS.range)
		
		for x in domains:
			if not isBlankNode(x):
				aClass = self.getClass(uri=str(x))
				if aClass:
					aProp.domains += [aClass]
					aClass.domain_of += [aProp]
				else:
					aProp.domains += [x]  # edge case: it's not an OntoClass instance?
				
		for x in ranges:
			if not isBlankNode(x):
				aClass = self.getClass(uri=str(x))
				if aClass:
					aProp.ranges += [aClass]
					aClass.range_of += [aProp]
				else:
					aProp.ranges += [x]