def __init__(self): self.sink = FormulaSink() self.document = self.sink.termFor(uri="") self.parents = ["root", self.document] self.parentsLenStack = [] self.expectingIndividual = 1 self.rdftype = self.sink.termFor(uri="http://...#type") self.rdfli = self.sink.termFor(uri="http://...#li") self.awaitingFirstValue = 0 self.firstValueBuffer = None
def __init__(self): self.sink = FormulaSink() self.document = self.sink.termFor(uri="") self.parents = [ "root", self.document ] self.parentsLenStack = [] self.expectingIndividual = 1 self.rdftype = self.sink.termFor(uri="http://...#type") self.rdfli = self.sink.termFor(uri="http://...#li") self.awaitingFirstValue = 0 self.firstValueBuffer = None
class docHandler(xml.sax.ContentHandler): """ Parse an XML file into triples of URIs and bNodes. No literals. Plain string literals are turned into rdf:List objects containing the characters, where each character is identified by a URI. This allows markup to be mixed into the strings. An XML element whose name (after ns prefix) starts with an uppercase letter is taken to stand for an individual instance of the class named by the element. An XML element whose name (after ns prefix) starts with a lowercase letter is: * if no child elements of or text content: <unknown> * if exactly one child or one text char: a property linking to that element * if more than one: a property linking to an rdf:List of those elements """ def __init__(self): self.sink = FormulaSink() self.document = self.sink.termFor(uri="") self.parents = ["root", self.document] self.parentsLenStack = [] self.expectingIndividual = 1 self.rdftype = self.sink.termFor(uri="http://...#type") self.rdfli = self.sink.termFor(uri="http://...#li") self.awaitingFirstValue = 0 self.firstValueBuffer = None def instanceOf(self, uri=None): result = self.sink.termFor() if uri is not None: self.sink.insert( (result, self.rdftype, self.sink.termFor(uri=uri))) return result def startElementNS(self, name, qname, attrs): self.parentsLenStack.insert(0, len(self.parents)) if name[0].endswith("/") or name[0].endswith("#"): uri = name[0] + name[1] else: uri = name[0] + "/" + name[1] char1 = name[1][0:1] if char1.isupper(): self.prepareForIndividual() me = self.instanceOf(uri) self.gotIndividual(me) self.parents.insert(0, me) elif char1.islower(): self.prepareForProperty() me = self.sink.termFor(uri=uri) self.parents.insert(0, me) self.awaitingFirstValue = 1 else: raise RuntimeError, "not upper or lower?" def prepareForIndividual(self): if len(self.parents) % 2 == 1: print "# need to infer a property stripe" self.parents.insert(0, self.rdfli) def prepareForProperty(self): if len(self.parents) % 2 == 0: print "# need to infer an individual stripe" i = self.instanceOf() self.parents.insert(0, i) self.sink.insert( (self.parents[2], self.parents[1], self.parents[0])) def endElementNS(self, name, qname): if self.awaitingFirstValue: raise Error, "property with no value given" # use this syntax for named things? if self.firstValueBuffer is not None: self.sink.insert( (self.parents[1], self.parents[0], self.firstValueBuffer)) self.firstValueBuffer = None finalLen = self.parentsLenStack[0] del self.parentsLenStack[0] while len(self.parents) > finalLen: del self.parents[0] def characters(self, content): self.prepareForIndividual() #for char in content: # self.gotIndividual(self.sink.termFor(char)) self.gotIndividual(self.sink.termFor(content)) # append it to the string buffer, which gets # converted at the end or next individual. def gotIndividual(self, term): if self.awaitingFirstValue: self.awaitingFirstValue = 0 self.firstValueBuffer = term return if self.firstValueBuffer is not None: # we have multiple values; we need a list! self.prepareForProperty() # say it's a List? self.prepareForIndividual() self.sink.insert( (self.parents[1], self.parents[0], self.firstValueBuffer)) self.firstValueBuffer = None self.sink.insert((self.parents[1], self.parents[0], term))
class docHandler(xml.sax.ContentHandler): """ Parse an XML file into triples of URIs and bNodes. No literals. Plain string literals are turned into rdf:List objects containing the characters, where each character is identified by a URI. This allows markup to be mixed into the strings. An XML element whose name (after ns prefix) starts with an uppercase letter is taken to stand for an individual instance of the class named by the element. An XML element whose name (after ns prefix) starts with a lowercase letter is: * if no child elements of or text content: <unknown> * if exactly one child or one text char: a property linking to that element * if more than one: a property linking to an rdf:List of those elements """ def __init__(self): self.sink = FormulaSink() self.document = self.sink.termFor(uri="") self.parents = [ "root", self.document ] self.parentsLenStack = [] self.expectingIndividual = 1 self.rdftype = self.sink.termFor(uri="http://...#type") self.rdfli = self.sink.termFor(uri="http://...#li") self.awaitingFirstValue = 0 self.firstValueBuffer = None def instanceOf(self, uri=None): result = self.sink.termFor(); if uri is not None: self.sink.insert((result, self.rdftype, self.sink.termFor(uri=uri))) return result def startElementNS(self, name, qname, attrs): self.parentsLenStack.insert(0, len(self.parents)) if name[0].endswith("/") or name[0].endswith("#"): uri = name[0] + name[1] else: uri = name[0] + "/" + name[1] char1 = name[1][0:1] if char1.isupper(): self.prepareForIndividual() me = self.instanceOf(uri) self.gotIndividual(me) self.parents.insert(0, me) elif char1.islower(): self.prepareForProperty() me = self.sink.termFor(uri=uri) self.parents.insert(0, me) self.awaitingFirstValue = 1 else: raise RuntimeError, "not upper or lower?" def prepareForIndividual(self): if len(self.parents) % 2 == 1: print "# need to infer a property stripe" self.parents.insert(0, self.rdfli) def prepareForProperty(self): if len(self.parents) % 2 == 0: print "# need to infer an individual stripe" i = self.instanceOf() self.parents.insert(0, i) self.sink.insert((self.parents[2], self.parents[1], self.parents[0])) def endElementNS(self, name, qname): if self.awaitingFirstValue: raise Error, "property with no value given" # use this syntax for named things? if self.firstValueBuffer is not None: self.sink.insert((self.parents[1], self.parents[0], self.firstValueBuffer)) self.firstValueBuffer = None finalLen = self.parentsLenStack[0] del self.parentsLenStack[0] while len(self.parents) > finalLen: del self.parents[0] def characters(self, content): self.prepareForIndividual() #for char in content: # self.gotIndividual(self.sink.termFor(char)) self.gotIndividual(self.sink.termFor(content)) # append it to the string buffer, which gets # converted at the end or next individual. def gotIndividual(self, term): if self.awaitingFirstValue: self.awaitingFirstValue = 0 self.firstValueBuffer = term return if self.firstValueBuffer is not None: # we have multiple values; we need a list! self.prepareForProperty() # say it's a List? self.prepareForIndividual() self.sink.insert((self.parents[1], self.parents[0], self.firstValueBuffer)) self.firstValueBuffer = None self.sink.insert((self.parents[1], self.parents[0], term))