예제 #1
0
def convert_graph(options, closureClass = None) :
	"""
	Entry point for external scripts (CGI or command line) to parse an RDF file(s), possibly execute OWL and/or RDFS closures,
	and serialize back the result in some format.
	Note that this entry point can be used requiring no entailement at all.
	Because both the input and the output format for the package can be RDF/XML or Turtle, such usage would
	simply mean a format conversion.
	
	If OWL 2 RL processing is required, that also means that the owl:imports statements are interpreted. Ie,
	ontologies can be spread over several files. Note, however, that the output of the process would then include all 
	imported ontologies, too.

	@param options: object with specific attributes, namely:
	  - options.sources: list of uris or file names for the source data; for each one if the name ends with 'ttl', it is considered to be turtle, RDF/XML otherwise (this can be overwritten by the options.iformat, though)
	  - options.text: direct Turtle encoding of a graph as a text string (useful, eg, for a CGI call using a text field)
	  - options.owlClosure: can be yes or no
	  - options.rdfsClosure: can be yes or no
	  - options.owlExtras: can be yes or no; whether the extra rules beyond OWL 2 RL are used or not.
	  - options.axioms: whether relevant axiomatic triples are added before chaining (can be a boolean, or the strings "yes" or "no")
	  - options.daxioms: further datatype axiomatic triples are added to the output (can be a boolean, or the strings "yes" or "no")
	  - options.format: output format, can be "turtle" or "rdfxml"
	  - options.iformat: input format, can be "turtle", "rdfxml", or "auto". "auto" means that the suffix of the file is considered: '.ttl' is for turtle, rdfxml otherwise
	  - options.trimming: whether the extension to OWLRL should also includ trimming
	@param closureClass: explicit class reference. If set, this overrides the various different other options to be used as an extension. 
	"""
	def __convert_to_turtle(graph) :
		"""Using a non-rdflib Turtle Serializer"""
		return graph.serialize(format="turtle")

	def __convert_to_json(graph) :
		"""Using a JSON-LD Serializer"""
		return graph.serialize(format="json")

	def __convert_to_XML(graph) :
		"""Using a non-rdflib RDF/XML Serializer"""
		retval = ""
		try :
			retval = graph.serialize(format="pretty-xml")
		except :
			# there are cases when the pretty serialization goes wild, eg, when a blank node is used as a class name
			# as a fall back the ugly but safer simple xml serialization is used
			retval = graph.serialize(format="xml")
		return retval

	def __modify_request_header() :
		"""Older versions of RDFlib, though they added an accept header, did not include anything for turtle. This is
		taken care of here."""
		if rdflib.__version__ < "3.0.0" :
			from rdflib.URLInputSource import headers
			# The xhtml+xml probably refers to the ability of distill RDFa, though the current RDFLib distribution
			# is fairly poor on that one...
			acceptHeader = "application/rdf+xml, text/turtle, text/n3, application/xml;q=0.8, application/xhtml+xml;q=0.5"
			headers['Accept'] = acceptHeader
		else :
			# Unfortunately, there is no clean way of doing that in rdflib 3.0, which knows only n3...
			pass
		
	def __check_yes_or_true(opt) :
		return opt == True or opt == "yes" or opt == "Yes" or opt == "True" or opt == "true"
		
	import warnings
	warnings.filterwarnings("ignore")
	if len(options.sources) == 0 and (options.text == None or len(options.text.strip()) == 0) :
		raise Exception("No graph specified either via a URI or text")

	__modify_request_header()

	graph = Graph()
	
	# Just to be sure that this attribute does not create issues with older versions of the service...
	# the try statement should be removed, eventually...
	iformat = AUTO
	try :
		iformat = options.iformat
	except :
		# exception can be raised if that attribute is not used at all, true for older versions
		pass
	
	# similar measure with the possible usage of the 'source' options
	try :
		if options.source != None : options.sources.append(options.source)
	except :
		# exception can be raised if that attribute is not used at all, true for newer versions
		pass
	
	# Get the sources first. Note that a possible error is filtered out, namely to process the same file twice. This is done
	# by turning the intput arguments into a set...
	for inp in set(options.sources) : __parse_input(iformat, inp, graph)
	
	# add the possible extra text (ie, the text input on the HTML page)
	if options.text != None :
		graph.parse(StringIO.StringIO(options.text),format="n3")

	# Get all the options right
	owlClosure  = __check_yes_or_true(options.owlClosure)
	rdfsClosure = __check_yes_or_true(options.rdfsClosure) 
	owlExtras   = __check_yes_or_true(options.owlExtras)
	try :
		trimming    = __check_yes_or_true(options.trimming)
	except :
		trimming	= False
	axioms  	= __check_yes_or_true(options.axioms) 
	daxioms 	= __check_yes_or_true(options.daxioms) 
		
	if owlClosure : interpret_owl_imports(iformat, graph)
		
	# adds to the 'beauty' of the output
	graph.bind("owl","http://www.w3.org/2002/07/owl#")
	graph.bind("xsd","http://www.w3.org/2001/XMLSchema#")
	
	#@@@@ some smarter choice should be used later to decide what the closure class is!!! That should
	# also control the import management. Eg, if the superclass includes OWL...
	if closureClass is not None :
		closure_class = closureClass
	else :
		closure_class = return_closure_class(owlClosure, rdfsClosure, owlExtras, trimming)
		
	DeductiveClosure(closure_class, improved_datatypes = True, rdfs_closure = rdfsClosure, axiomatic_triples = axioms, datatype_axioms = daxioms).expand(graph)

	if options.format == TURTLE :
		return __convert_to_turtle(graph)
	elif options.format == JSON :
		return __convert_to_json(graph)
	else :
		return __convert_to_XML(graph)