def identify_rdf_parser(self): format = self.rdf_format if format == 'ttl': #parser = RDF.TurtleParser() logger.error( "Turtle is not supported by LODStats, should be converted to ntriples!" ) parser = RDF.NTriplesParser() elif format == 'n3': parser = None raise NameError( "n3 serialization is not supported, please convert to nt") elif format == 'nt': # FIXME: this probably won't do for n3 parser = RDF.NTriplesParser() elif format == 'nq': parser = RDF.Parser(name='nquads') elif format == 'rdf': parser = RDF.Parser(name="rdfxml") elif format == 'sparql': return None elif format == 'sitemap': return None else: raise NameError("unsupported format") return parser
def get_parser(url, format=None): if format is None: format = get_format(url) if format == 'ttl': parser = RDF.TurtleParser() elif format == 'nt' or format == 'n3': # FIXME: this probably won't do for n3 parser = RDF.NTriplesParser() elif format == 'nq': parser = RDF.Parser(name='nquads') elif format == 'rdf': parser = RDF.Parser(name="rdfxml") elif format == 'sparql': return None elif format == 'sitemap': return None else: raise NameError("unsupported format") return parser
def __init__(self, filePath=None): """ Constructor - Reads the pass process model from the file. The filePath can either be a local path or an absolute path in the internet, specifing the uri where the model to load is defined. In the current Implementation the namespace is cut off for certain namespaces and a local search is performed! If no filePath is given a new model is created @param string filePath : (Absolute) path to the file to read the pass process model from @return : @author """ self._resources = [] self._classMapper = ClassMapper() self._attrMapper = AttributeMapper() self._filePath = None self._changeListeners = [] self._currentlyLoading = False self._model = None #Now decide whether to load a model or create a new one if (filePath is None): #Import here because is only needed here (to prevent loops) from PASSProcessModel import * from BaseLayer import * self._model = PASSProcessModel(self) layer = BaseLayer(self) self._model.hasModelComponent.append(layer) else: #========= Load a model ======= #Check the type of the path and whether the path exists isString = isinstance(filePath, str) if (not isString): raise Exception("Parameter \"filePath\" must be of type str!") #If the basepath is used to define the uri delete it an replace it with a relative path filePath = filePath.replace(ModelManager.DEFAULT_BASE_URI, "./") hasWriteAccess = os.access(os.path.dirname(filePath), os.W_OK) validExtension = os.path.splitext( filePath)[-1] in ModelManager.POSSIBLE_EXTENSIONS #Raise exceptions if above restrictions are not valid if (not hasWriteAccess): raise Exception( "Parameter \"filePath\" must point to a valid file address!" ) if (not validExtension): raise Exception( "Parameter \"filePath\" must have a valid extension!") #========= Continue if everything is alright ======== #Set the variable to not fire change events currently self._currentlyLoading = True #Set the file path self._filePath = filePath #========= Now load the model ========= storage = RDF.MemoryStorage() if storage is None: raise Exception( "Failed to create storage for reading from the file!") model = RDF.Model(storage) if (model is None): raise Exception( "Faile to create model for reading from the file!") #========= Now start parsing ========= #Select parser by file type if (os.path.splitext(filePath)[-1] == ".nt"): parser = RDF.NTriplesParser() else: parser = RDF.Parser("raptor") #Read from file uri = RDF.Uri(string="file:" + filePath) for statement in parser.parse_as_stream(uri, uri): model.add_statement(statement) #Get all class types of each subject node by rdf:type and store them all in the classTypes dict typeQuery = RDF.Query( "SELECT ?s ?o WHERE { ?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?o }" ) classTypes = {} for result in typeQuery.execute(model): #Check the type of s if (result["s"].is_resource()): subjectString = str(result["s"].uri) elif (result["s"].is_blank()): subjectString = str(result["s"].blank_identifier) else: raise Exception( "Received a RDFNode that is a subject and neither of type normal nor blank!" ) #Now insert it into dict and append type if (not (subjectString in classTypes)): classTypes[subjectString] = [] classTypes[subjectString].append(str(result["o"])) #Now generate the instances depending on their rdf:type links stored in the classTypes dict ownClasses = {} for (key, value) in list(classTypes.items()): className = self._classMapper.getClassName(value) #Do a dynamic import of the missing class exec(str("from " + className + " import *"), globals()) classConstructor = globals()[className] if (key.startswith("http://")): newClass = classConstructor(self, uri=key) else: newClass = classConstructor(self, isBlank=True, blankNodeId=key) #Set the PASSProcessModel-Reference if (className == "PASSProcessModel"): if (self._model is not None): print( "WARNING! Two Process Models were read from the file while only one can be instanciated!" ) self._model = newClass ownClasses[key] = newClass #Go through all triples with the component id and perform them before the others to set the right ids on the PASSProcessModelElements tripleQuery = RDF.Query( "SELECT ?s ?o WHERE { ?s <http://www.imi.kit.edu/abstract-pass-ont#hasModelComponentID> ?o }" ) for result in tripleQuery.execute(model): self._convertTriples( result, ownClasses, "http://www.imi.kit.edu/abstract-pass-ont#hasModelComponentID" ) #Go through all triples and include them - Eventually generate additional class instances or literals if this object has not been created yet tripleQuery = RDF.Query("SELECT ?s ?a ?o WHERE { ?s ?a ?o }") for result in tripleQuery.execute(model): self._convertTriples(result, ownClasses) #Finished loading self._currentlyLoading = False
#!/usr/bin/env python import sys import RDF import csv parser = RDF.NTriplesParser() csvwriter = csv.writer(sys.stdout, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) print "id\tcomment_en" for line in sys.stdin: if line.startswith("#"): #skip comments continue for statement in parser.parse_string_as_stream(line, "http://localhost/"): if statement.subject.is_resource(): subjectString = statement.subject.uri.__unicode__() if statement.predicate.is_resource(): predicateString = statement.predicate.uri.__unicode__() if statement.object.is_literal(): objectString = statement.object.literal[0].encode('utf-8') csvwriter.writerow([subjectString, objectString])