def evaluateObject(self, subj_py): if verbosity() > 80: progress("scrape input:" + ` subj_py `) str, pat = subj_py patc = re.compile(pat) m = patc.search(str) if m: if verbosity() > 80: progress("scrape matched:" + m.group(1)) return m.group(1) if verbosity() > 80: progress("scrape didn't match")
def evaluateObject(self, subj_py): if verbosity() > 80: progress("scrape input:"+`subj_py`) str, pat = subj_py patc = re.compile(pat) m = patc.search(str) if m: if verbosity() > 80: progress("scrape matched:"+m.group(1)) return m.group(1) if verbosity() > 80: progress("scrape didn't match")
def unify(self, other, vars, existentials, bindings): """Unify this which may contain variables with the other, which may contain existentials but not variables. Return 0 if impossible. Return [(var1, val1), (var2,val2)...] if match""" if verbosity() > 97: progress("Unifying symbol %s with %s vars=%s"%(self, other,vars)) if self is other: return bindings if self in vars+existentials: if verbosity() > 80: progress("Unifying term MATCHED %s to %s"%(self,other)) return bindings + [(self, other)] return 0
def evaluateObject(self, subj_py): if verbosity() > 80: progress("os:argv input:"+`subj_py`) if self.store.argv: # Not None or []. was also: isString(subj_py) and try: argnum = int(subj_py) -1 except ValueError: if verbosity() > 30: progress("os:argv input is not a number: "+`subj_py`) return None if argnum < len(self.store.argv): return self.store.argv[argnum]
def evaluateObject(self, subj_py): # raise Error store = self.store if verbosity() > 80: progress("search input:" + ` subj_py `) str, pat = subj_py patc = re.compile(pat) m = patc.search(str) if m: if verbosity() > 80: progress("search matched:" + m.group(1)) return m.groups() if verbosity() > 80: progress("search didn't match")
def evaluateObject(self, subj_py): if verbosity() > 80: progress("Concatenation input:"+`subj_py`) str = "" for x in subj_py: if not isString(x): if type(x) == type(long()) or isinstance(x, Decimal): x = make_string(x) else: x = `x` if verbosity() > 34: progress("Warning: Coercing to string for concat:"+`x`) # return None # Can't str = str + x return str
def evaluateObject(self, subj_py): # raise Error store = self.store if verbosity() > 80: progress("search input:"+`subj_py`) str, pat = subj_py patc = re.compile(pat) m = patc.search(str) if m: if verbosity() > 80: progress("search matched:"+m.group(1)) return m.groups() if verbosity() > 80: progress("search didn't match")
def unify(self, other, vars, existentials, bindings): """Unify this which may contain variables with the other, which may contain existentials but not variables. Return 0 if impossible. Return [(var1, val1), (var2,val2)...] if match""" if verbosity() > 97: progress("Unifying symbol %s with %s vars=%s" % (self, other, vars)) if self is other: return bindings if self in vars + existentials: if verbosity() > 80: progress("Unifying term MATCHED %s to %s" % (self, other)) return bindings + [(self, other)] return 0
def evaluateObject(self, subj_py): if verbosity() > 80: progress("strTime:parse input:"+`subj_py`) str, format = subj_py try: return isodate.fullString(int(calendar.timegm(time.strptime(str, format)))); except: return None
def _scan(self, x, context=None): # progress("Scanning ", x, " &&&&&&&&") # assert self.context._redirections.get(x, None) is None, "Should not be redirected: "+`x` if verbosity() > 98: progress( "scanning %s a %s in context %s" % (` x `, ` x.__class__ `, ` context `), x.generated(), self._inContext.get(x, "--"), ) if isinstance(x, NonEmptyList) or isinstance(x, N3Set): for y in x: self._scanObj(context, y) if isinstance(x, Formula): for s in x.statements: for p in PRED, SUBJ, OBJ: y = s[p] if isinstance(y, AnonymousVariable) or (isinstance(y, Fragment) and y.generated()): z = self._inContext.get(y, None) if z == "many": continue # forget it if z is None: self._inContext[y] = x elif z is not x: self._inContext[y] = "many" continue z = self._occurringAs[p].get(y, 0) self._occurringAs[p][y] = z + 1 # progress("&&&&&&&&& %s now occurs %i times as %s" %(`y`, z+1, "CPSO"[p])) # else: # progress("&&&&&&&&& yyyy %s has class %s " %(`y`, `y.__class__`)) if x is not y: self._scan(y, x) self._breakloops(x)
def unify(self, other, vars, existentials, bindings): """Unify this which may contain variables with the other, which may contain existentials but not variables. Return 0 if impossible. Return [(var1, val1), (var2,val2)...] if match""" if verbosity() > 90: progress("Unifying list %s with %s vars=%s"%(self.value(), other.value(),vars)) if not isinstance(other, NonEmptyList): return 0 if other is self: return bindings lb = len(bindings) nb = self.first.unify(other.first, vars, existentials, bindings) if nb == 0: return 0 if len(nb) > lb: vars2 = vars[:] existentials2 = existentials[:] bindings2 = bindings[:] for var, val in nb[lb:]: if var in vars2: vars2.remove(var) bindings2.append((var, val)) else: existentials2.remove(var) o = other.rest.substitution(nb) s = self.rest.substitution(nb) return s.unify(o, vars2, existentials2, bindings2) else: return self.rest.unify(other.rest, vars, existentials, bindings)
def evaluateObject(self, subj_py): """Subject is format string or empty string for std formatting. Returns reformatted. @@@@ Ignores TZ""" if verbosity() > 80: progress("time:localTime input:" + ` subj_py `) format = subj_py if format == "": return isodate.asString(time.time()) return time.strftime(format, time.localtime(time.time()))
def evaluateObject(self, subj_py): if verbosity() > 80: progress("strTime:parse input:"+`subj_py`) value, format = subj_py try: return str(calendar.timegm(time.strptime(value, format))) except: return None
def evaluateObject(self, subj_py): if verbosity() > 80: progress("strTime:parse input:" + ` subj_py `) str, format = subj_py try: return str(calendar.timegm(time.strptime(str, format))) except: return None
def _propertyAttr(self, ns, name, value): "Parse a propertrAttr production. 7.2.25" if verbosity() > 50: progress("_propertyAttr ns=%s name=%s value=%s"% (ns, name, value)) if self._subject == None: # Property as attribute self._subject = self.newBlankNode() self.sink.makeStatement((self._context, self._predicate, self._subject, self._subject ), why=self._reason2) if not ns: if "L" not in self.flags: # assume local? raise BadSyntax(sys.exc_info(), "No namespace on property attribute %s=%s" % (name, value)) ns = self._thisDoc + "#" pred = ns + name if pred == RDF_NS_URI + "type": # special case obj = self.sink.newSymbol(self.uriref(value)) # SYN#7.2.11 step 2/3 else: obj = self.sink.newLiteral(value, self._datatype, self._language) self.sink.makeStatement((self._context, self.sink.newSymbol(self.uriref(pred)), self._subject, obj), why=self._reason2) self._state = STATE_NOVALUE # NOT looking for value return
def unify(self, other, vars, existentials, bindings): """Unify this which may contain variables with the other, which may contain existentials but not variables. Return 0 if impossible. Return [(var1, val1), (var2,val2)...] if match""" if verbosity() > 90: progress("Unifying list %s with %s vars=%s" % (self.value(), other.value(), vars)) if not isinstance(other, NonEmptyList): return 0 if other is self: return bindings lb = len(bindings) nb = self.first.unify(other.first, vars, existentials, bindings) if nb == 0: return 0 if len(nb) > lb: vars2 = vars[:] existentials2 = existentials[:] bindings2 = bindings[:] for var, val in nb[lb:]: if var in vars2: vars2.remove(var) bindings2.append((var, val)) else: existentials2.remove(var) o = other.rest.substitution(nb) s = self.rest.substitution(nb) return s.unify(o, vars2, existentials2, bindings2) else: return self.rest.unify(other.rest, vars, existentials, bindings)
def bind(self, prefix, uri): """Pass on a binding hint for later use in output. This really is just a hint. The parser calls bind to pass on the prefix which it came across, as this is a useful hint for a human readable prefix for output of the same namespace. Otherwise, output processors will have to invent or avoid using namespaces, which will look ugly. """ if ':' not in uri: # @@ should raise an exception, but sax callbacks crash. warn("@@URI must be absolute: %s" % uri) # If we don't have a prefix for this ns... if self.prefixes.get(uri, None) == None: if self.namespaces.get(prefix, None) == None: # For conventions self.prefixes[uri] = prefix self.namespaces[prefix] = uri if verbosity() > 29: progress("RDFSink.bind: prefix %s: to <%s>. " % (prefix, uri)) else: self.bind(prefix + "_", uri) # Recursion unnecessary
def evaluateSubject(self, obj_py): if verbosity() > 80: progress("Concat input:" + ` obj_py `) str = "" for x in obj_py: if not isString(x): return None # Can't str = str + x return str
def evaluateObject(self, subj_py): """Subject is format string or empty string for std formatting. Returns reformatted. @@@@ Ignores TZ""" if verbosity() > 80: progress("time:localTime input:"+`subj_py`) format = subj_py if format =="" : return isodate.asString(time.time()) return time.strftime(format, time.localtime(time.time()))
def evaluateSubject(self, obj_py): if verbosity() > 80: progress("Concat input:"+`obj_py`) str = "" for x in obj_py: if not isString(x): return None # Can't str = str + x return str
def evaluateObject(self, subj_py): if verbosity() > 80: progress("Concatenation input:" + ` subj_py `) str = "" for x in subj_py: if not isString(x): if type(x) == type(long()) or isinstance(x, Decimal): x = make_string(x) else: x = ` x ` if verbosity() > 34: progress("Warning: Coercing to string for concat:" + ` x `) # return None # Can't str = str + x return str
def _scan(self, x, context=None): # progress("Scanning ", x, " &&&&&&&&") # assert self.context._redirections.get(x, None) is None, "Should not be redirected: "+`x` if verbosity() > 98: progress( "scanning %s a %s in context %s" % ( ` x `, ` x.__class__ `, ` context `), x.generated(), self._inContext.get(x, "--")) if isinstance(x, NonEmptyList) or isinstance(x, N3Set): for y in x: self._scanObj(context, y) if isinstance(x, Formula): for s in x.statements: for p in PRED, SUBJ, OBJ: y = s[p] if (isinstance(y, AnonymousVariable) or (isinstance(y, Fragment) and y.generated())): z = self._inContext.get(y, None) if z == "many": continue # forget it if z is None: self._inContext[y] = x elif z is not x: self._inContext[y] = "many" continue z = self._occurringAs[p].get(y, 0) self._occurringAs[p][y] = z + 1 # progress("&&&&&&&&& %s now occurs %i times as %s" %(`y`, z+1, "CPSO"[p])) # else: # progress("&&&&&&&&& yyyy %s has class %s " %(`y`, `y.__class__`)) if x is not y: self._scan(y, x) self._breakloops(x)
def evaluateObject(self, subj_py): """params are epoch-seconds time string, format string. Returns reformatted""" if verbosity() > 80: progress("strTime:format input:" + ` subj_py `) str, format = subj_py try: return time.strftime(format, time.gmtime(int(str))) except: return None
def evaluateObject(self, subj_py): """params are ISO time string, format string. Returns reformatted. Ignores TZ@@""" if verbosity() > 80: progress("strTime:format input:" + ` subj_py `) str, format = subj_py try: return time.strftime(format, time.gmtime(isodate.parse(str))) except: return None
def evaluateObject(self, subj_py): """params are ISO time string, format string. Returns reformatted. Ignores TZ@@""" if verbosity() > 80: progress("strTime:format input:"+`subj_py`) str, format = subj_py try: return time.strftime(format, time.gmtime(isodate.parse(str))) except: return None
def evaluateObject(self, subj_py): """params are epoch-seconds time string, format string. Returns reformatted""" if verbosity() > 80: progress("strTime:format input:"+`subj_py`) str, format = subj_py try: return time.strftime(format, time.gmtime(int(str))) except: return None
def do(ele, level=0): if isinstance(ele, Text): if verbosity() > 70: progress("Ignoring text '%s'" % ele.nodeValue) return None ln = ele.localName if verbosity() > 20: progress(" " * level, ln) if ln == "dict": me = kb.newBlankNode() n = len(ele.childNodes) i = 0 pred = None while i < n: e = ele.childNodes[i] if isinstance(e, Text): if verbosity() > 70: progress("Ignoring text '%s'" % e.nodeValue) i = i + 1 continue if e.localName == "key": property = e.firstChild.data if not property: property = "nullProp" pred = kb.newSymbol(property) else: value = ele.childNodes[i] obj = do(value, level + 1) kb.add(me, pred, obj) i = i + 1 return me elif ln == "string": s = ele.firstChild.data return kb.literal(s) elif ln == "array": a = [] for e in ele.childNodes: a.append(do(e)) a.reverse() last = kb.store.nil for item in a: x = kb.newBlankNode() kb.add(x, kb.store.first, kb.newSymbol(item)) kb.add(x, kb.store.rest, last) last = x return last else: raise RuntimeError("Unexpected tag %s" % ln)
def do(ele, level=0): if isinstance(ele, Text): if verbosity() > 70: progress("Ignoring text '%s'" % ele.nodeValue) return None ln = ele.localName if verbosity() > 20: progress(" "*level, ln) if ln == "dict": me = kb.newBlankNode() n = len(ele.childNodes) i = 0 pred = None while i<n: e = ele.childNodes[i] if isinstance(e, Text): if verbosity() > 70: progress("Ignoring text '%s'" % e.nodeValue) i = i + 1 continue if e.localName == "key": property = e.firstChild.data if not property: property = "nullProp" pred = kb.newSymbol(property) else: value = ele.childNodes [i] obj = do(value, level+1) kb.add(me, pred, obj) i = i + 1 return me elif ln == "string": s = ele.firstChild.data return kb.literal(s) elif ln == "array": a = [] for e in ele.childNodes: a.append(do(e)) a.reverse() last = kb.store.nil for item in a: x = kb.newBlankNode() kb.add(x, kb.store.first, kb.newSymbol(item)) kb.add(x, kb.store.rest, last) last = x return last else: raise RuntimeError("Unexpected tag %s" % ln)
def declareUniversal(self, v, key=None): # if key is not AnonymousUniversal: # raise RuntimeError("""We have now disallowed the calling of declareUniversal. #For future reference, use newUniversal #""") if verbosity() > 90: progress("Declare universal:", v) if v not in self._universalVariables: self._universalVariables.add(v) if self.occurringIn(Set([self.newSymbol(v.uriref())])): raise ValueError("Are you trying to confuse me with %s?" % v)
def occurringIn(self, vars): "Which variables in the list occur in this list?" set = [] if verbosity() > 98: progress("----occuringIn: ", `self`) x = self while not isinstance(x, EmptyList): y = x.first x = x.rest set = merge(set, y.occurringIn(vars)) return set
def occurringIn(self, vars): "Which variables in the list occur in this list?" set = [] if verbosity() > 98: progress("----occuringIn: ", ` self `) x = self while not isinstance(x, EmptyList): y = x.first x = x.rest set = merge(set, y.occurringIn(vars)) return set
def evaluateObject(self, subj_py): """Subject is (empty string for standard formatting or) format string. Returns formatted.""" if verbosity() > 80: progress("time:gmTime input:"+`subj_py`) format = subj_py if format =="" : format="%Y-%m-%dT%H:%M:%SZ" try: return time.strftime(format, time.gmtime(time.time())) except: return isodate.asString(time())
def declareUniversal(self, v, key=None): if False and key is not AnonymousUniversal: raise RuntimeError("""We have now disallowed the calling of declareUniversal. For future reference, use newUniversal """) if verbosity() > 90: progress("Declare universal:", v) if v not in self._universalVariables: self._universalVariables.add(v) if self.occurringIn(Set([self.newSymbol(v.uriref())])): raise ValueError("Internal error: declareUniversal: %s?" % v)
def evaluateObject(self, subj_py): """Subject is (empty string for standard formatting or) format string. Returns formatted.""" if verbosity() > 80: progress("time:gmTime input:" + ` subj_py `) format = subj_py if format == "": format = "%Y-%m-%dT%H:%M:%SZ" try: return time.strftime(format, time.gmtime(time.time())) except: return isodate.asString(time())
def evalObj45(self, subj, queue, bindings, proof, query): # raise RuntimeError('I got here!') subj_py = list(subj) if verbosity() > 80: progress("Concatenation input:"+`subj_py`) retVal = [] for x in subj_py: try: val = x.value() if not isString(val): if type(val) == type(long()) or isinstance(val, Decimal): val = make_string(val) else: val = `val` if verbosity() > 34: progress("Warning: Coercing to string for concat:"+`val`) retVal.append(val) except UnknownType: progress("Warning: Coercing to string for concat:"+`x`) retVal.append(x.string) return subj.store.newLiteral(''.join(retVal))
def occurringIn(self, vars): "Which variables in the list occur in this?" set = Set() if verbosity() > 98: progress("----occuringIn: ", ` self `) for p in PRED, SUBJ, OBJ: y = self[p] if y is self: pass else: set = set | y.occurringIn(vars) return set
def declareUniversal(self, v, key=None): if False and key is not AnonymousUniversal: raise RuntimeError( """We have now disallowed the calling of declareUniversal. For future reference, use newUniversal """) if verbosity() > 90: progress("Declare universal:", v) if v not in self._universalVariables: self._universalVariables.add(v) if self.occurringIn(Set([self.newSymbol(v.uriref())])): raise ValueError("Internal error: declareUniversal: %s?" % v)
def occurringIn(self, vars): "Which variables in the list occur in this?" set = Set() if verbosity() > 98: progress("----occuringIn: ", `self`) for p in PRED, SUBJ, OBJ: y = self[p] if y is self: pass else: set = set | y.occurringIn(vars) return set
def evalObj45(self, subj, queue, bindings, proof, query): # raise RuntimeError('I got here!') subj_py = list(subj) if verbosity() > 80: progress("Concatenation input:" + ` subj_py `) retVal = [] for x in subj_py: try: val = x.value() if not isString(val): if type(val) == type(long()) or isinstance(val, Decimal): val = make_string(val) else: val = ` val ` if verbosity() > 34: progress("Warning: Coercing to string for concat:" + ` val `) retVal.append(val) except UnknownType: progress("Warning: Coercing to string for concat:" + ` x `) retVal.append(x.string) return subj.store.newLiteral(''.join(retVal))
def newStatement(self, s, why): if verbosity() > 80 and why is not dontAsk: progress("Believing %s because of %s"%(s, why)) import formula for x in s.quad[1:]: if isinstance(x, formula.Formula): if x.canonical is not x: raise RuntimeError(x) assert why is not self self.reasonForStatement[s]=why if isinstance(why, (Premise, BecauseOfRule, BecauseOfData)): why.statements.add(s)
def newStatement(self, s, why): if verbosity() > 80 and why is not dontAsk: progress("Believing %s because of %s" % (s, why)) import formula for x in s.quad[1:]: if isinstance(x, formula.Formula): if x.canonical is not x: raise RuntimeError(x) assert why is not self self.reasonForStatement[s] = why if isinstance(why, (Premise, BecauseOfRule, BecauseOfData)): why.statements.add(s)
def occurringIn(self, vars): """Return a set of all variables specified in vars that occur in the statement. """ set = Set() if verbosity() > 98: progress("----occuringIn: ", `self`) for p in PRED, SUBJ, OBJ: y = self[p] if y is self: pass else: set = set | y.occurringIn(vars) return set
def evaluateObject(self, subj_py): str = None for x in subj_py: if not isString(x): if type(x) == type(long()) or isinstance(x, Decimal): x = make_string(x) else: x = `x` if verbosity() > 34: progress("Warning: Coercing to string for codepoint-equal:"+`x`) # return None # Can't if str == None: str = x elif str != x: return False return True
def substitution(self, bindings, why=None, cannon=False, keepOpen=False): "Return this or a version of me with subsitution made" assert isinstance(bindings, dict) store = self.store if self in bindings: return bindings[self] oc = self.occurringIn(bindings.keys()) if oc == Set(): return self # phew! y = store.newFormula() if verbosity() > 90: progress("substitution: formula"+`self`+" becomes new "+`y`, " because of ", oc) y.loadFormulaWithSubstitution(self, bindings, why=why) if keepOpen: return y return y.canonicalize(cannon=cannon)
def canonicalize(F): """If this formula already exists, return the master version. If not, record this one and return it. Call this when the formula is in its final form, with all its statements. Make sure no one else has a copy of the pointer to the smushed one. LIMITATION: The basic Formula class does NOT canonicalize. So it won't spot idenical formulae. The IndexedFormula will. """ store = F.store if F.canonical != None: if verbosity() > 70: progress("Canonicalize -- @@ already canonical:" + ` F `) return F.canonical # @@@@@@@@ no canonicalization @@ warning F.canonical = F return F
def canonicalize(F): """If this formula already exists, return the master version. If not, record this one and return it. Call this when the formula is in its final form, with all its statements. Make sure no one else has a copy of the pointer to the smushed one. LIMITATION: The basic Formula class does NOT canonicalize. So it won't spot idenical formulae. The IndexedFormula will. """ store = F.store if F.canonical != None: if verbosity() > 70: progress("Canonicalize -- @@ already canonical:"+`F`) return F.canonical # @@@@@@@@ no canonicalization @@ warning F.canonical = F return F
def _scanObj(self, context, x): "Does this appear in just one context, and if so counts how many times as object" z = self._inContext.get(x, None) if z == "many": return # forget it if z is None: self._inContext[x] = context elif z is not context: self._inContext[x] = "many" return if isinstance(x, NonEmptyList) or isinstance(x, N3Set): for y in x: self._scanObj(context, y) if isinstance(x, AnonymousVariable) or (isinstance(x, Fragment) and x.generated()): y = self._occurringAs[OBJ].get(x, 0) + 1 self._occurringAs[OBJ][x] = y if verbosity() > 98: progress("scan: %s, a %s, now has %i occurrences as %s" % (x, x.__class__, y, "CPSOqqqqq"[y]))
def substitution(self, bindings, why=None, cannon=False, keepOpen=False): "Return this or a version of me with subsitution made" assert isinstance(bindings, dict) store = self.store if self in bindings: return bindings[self] oc = self.occurringIn(bindings.keys()) if oc == Set(): return self # phew! y = store.newFormula() if verbosity() > 90: progress( "substitution: formula" + ` self ` + " becomes new " + ` y `, " because of ", oc) y.loadFormulaWithSubstitution(self, bindings, why=why) if keepOpen: return y return y.canonicalize(cannon=cannon)
def evaluateObject(self, subj_py): str = None for x in subj_py: if not isString(x): if type(x) == type(long()) or isinstance(x, Decimal): x = make_string(x) else: x = ` x ` if verbosity() > 34: progress( "Warning: Coercing to string for codepoint-equal:" + ` x `) # return None # Can't if str == None: str = x elif str != x: return False return True
def bind(self, prefix, uri): """Pass on a binding hint for later use in output This really is just a hint. The parser calls bind to pass on the prefix which it came across, as this is a useful hint for a human readable prefix for output of the same namespace. Otherwise, output processors will have to invent or avoid useing namespaces, which will look ugly """ if ':' not in uri: # @@ should raise an exception, but sax callbacks crash. warn("@@URI must be absolute: %s" % uri) # If we don't have a prefix for this ns... if self.prefixes.get(uri, None) == None: if self.namespaces.get(prefix,None) == None: # For conventions self.prefixes[uri] = prefix self.namespaces[prefix] = uri if verbosity() > 29: progress("RDFSink.bind: prefix %s: to <%s>. "%(prefix, uri)) else: self.bind(prefix+"_", uri) # Recursion unnecessary
def newStatement(self, s, why): # Why isn't a reason here, it is the source if verbosity() > 80: progress("Merge: Believing %s because of merge" % (s)) self.fodder.add(why) self.reasonForStatement[s] = why
def evaluateObject(self, subj_py): if verbosity() > 80: progress("os:baseRelative input:"+`subj_py`) if isString(subj_py): return uripath.refTo(uripath.base(), subj_py) progress("Warning: os:baseRelative input is not a string: "+`subj_py`)
def selectDefaultPrefix(self, printFunction): """ Symbol whose fragments have the most occurrences. we suppress the RDF namespace itself because the XML syntax has problems with it being default as it is used for attributes. This also outputs the prefixes.""" if "d" in self.flags: self.defaultNamespace = None self.dumpPrefixes() return dummySink = self.sink.dummyClone() dummySerializer = Serializer(self.context, sink=dummySink, flags=self.flags + "d", sorting=self.sorting) printFunction(dummySerializer) best = 0 mp = None counts = dummySink.namespaceCounts() for r, count in counts.items(): if verbosity() > 25: progress(" Count is %3i for %s" % (count, r)) if (r != RDF_NS_URI and count > 0 and (count > best or (count == best and mp > r))): # Must be repeatable for retests best = count mp = r if verbosity() > 20: progress("# Most popular Namespace is %s with %i" % (mp, best)) self.defaultNamespace = mp # Make up prefixes for things which don't have them: for r, count in counts.items(): if count > 1 and r != mp: if self.store.prefixes.get(r, None) is None: p = r if p[-1] in "/#": p = p[:-1] slash = p.rfind("/") if slash >= 0: p = p[slash + 1:] i = 0 while i < len(p): if p[i] in prefixchars: i = i + 1 else: break p = p[:i] if len(p) < 6 and self.store.namespaces.get( p, None) is None: # and p[:3]!='xml': pref = p else: p = p[:5] for l in (3, 2, 4, 1, 5): if self.store.namespaces.get( p[:l], None) is None: # and p[:l][:3]!='xml': pref = p[:l] break else: n = 2 while 1: pref = p[:3] + ` n ` if self.store.namespaces.get(pref, None) is None: break n = n + 1 self.store.bind(pref, r) if verbosity() > 50: progress("Generated @prefix %s: <%s>." % (pref, r)) if self.defaultNamespace is not None: self.sink.setDefaultNamespace(self.defaultNamespace) # progress("&&&& Counts: ", counts) prefixes = self.store.namespaces.keys( ) # bind in same way as input did FYI prefixes.sort() # For repeatability of test results for pfx in prefixes: r = self.store.namespaces[pfx] try: count = counts[r] if count > 0: self.sink.bind(pfx, r) except KeyError: pass return
def endElementNS(self, name, qname): """Handle end element event """ if verbosity() > 80: indent = "- " * len(self._stack) progress(indent+'# End %s, State was'%name[1], self._state, ", delayed was ", `self._delayedStatement`) if self._delayedStatement == 1: if verbosity() > 80: progress("Delayed subject "+`self._subject`) self._stack[-1][3] = self._stack[-1][3].close() if self._state == STATE_LITERAL: self._litDepth = self._litDepth - 1 if self._litDepth == 0: buf = self.testdata if XMLLiteralsAsDomTrees: e = self.domDocument.documentElement.firstChild if e is None: raise ValueError("Weird: " + `self.domDocument.documentElement`) # progress('e is '+`e`) while e.nodeType == e.TEXT_NODE: e = e.nextSibling #progress("@@@ e=", e, e.nodeName) self.domElement = e # Leave for literal parser to pick up if self.sink: self.sink.makeStatement(( self._context, self._predicate, self._subject, self.sink.newXMLLiteral(e) ), why=self._reason2) else: self._datatype = self.sink.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral") self.sink.makeStatement(( self._context, self._predicate, self._subject, self.sink.newLiteral(buf, self._datatype) ), why=self._reason2) self.testdata = "" else: if XMLLiteralsAsDomTrees: self.literal_element_end_DOM(name, qname) else: self.literal_element_end(name, qname) self._stack.pop() return # don't pop state elif self._state == STATE_VALUE: buf = self.testdata if self._datatype == None: # RDFCore changes 2003 - can't have dt and lang lang = self._language else: lang = None obj = self.sink.newLiteral(buf, self._datatype, lang) self.sink.makeStatement(( self._context, self._predicate, self._subject, obj), why=self._reason2) self.testdata = "" elif self._state == STATE_LIST: self.sink.makeStatement(( self._context, self.sink.newSymbol(List_NS + "rest"), self._subject, self.sink.newSymbol(List_NS + "nil") ), why=self._reason2) elif self._state == STATE_DESCRIPTION: self._items.pop() elif self._state == STATE_NOVALUE or \ self._state == STATE_NO_SUBJECT or \ self._state == STATE_OUTERMOST or \ self._state == STATE_NOT_RDF: # [email protected] 2002-09-11 pass else: raise RuntimeError, ("Unknown RDF parser state '%s' in end tag" % self._state, self._stack) # c1 = self._context # if self._subject is c1 and self_context is not c1: # self._subject = self._subject.close() # close before use l = self._stack.pop() # self._state = l[0] self._context = l[1] self._predicate = l[2] self._subject = l[3] if self._delayedStatement != None: if self._delayedStatement == 1: pass # progress("Delayed subject "+`self._subject`) # self._subject = self._subject.close() else: c, p, s, o = self._delayedStatement o = o.close() self.sink.makeStatement((c, p, s, o), why=self._reason2) self._delayedStatement = None self._delayedStatement = l[4] self._base = l[5] self.flush()
def startElementNS(self, name, qname, attrs): """ Handle start tag. """ if self._state != STATE_LITERAL: self.flush() self.bnode = None tagURI = ((name[0] or "") + name[1]) if verbosity() > 80: indent = ". " * len(self._stack) if not attrs: progress(indent+'# State was', self._state, ', start tag: <' + tagURI + '>') else: str = '# State =%s, start tag= <%s ' %( self._state, tagURI) for name, value in attrs.items(): str = str + " " + `name` + '=' + '"' + `value` + '"' progress(indent + str + '>') self._stack.append([self._state, self._context, self._predicate, self._subject, self._delayedStatement, self._base]) self._delayedStatement = None self._base = uripath.join(self._base, attrs.get((XML_NS_URI, "base"), self._base)) x = self._base.find("#") if x >= 0: self._base = self._base[:x] # See rdf-tests/rdfcore/xmlbase/test013.rdf try: tagURI = uripath.join(self._base, tagURI) # If relative, make absolute. Not needed for standard. except ValueError: pass # Needed for portable RDF generated with --rdf=z self._language = attrs.get((XML_NS_URI, "lang"), None) value = attrs.get((RDF_NS_URI, "datatype"), None) if value != None: self._datatype = self.sink.newSymbol(self.uriref(value)) else: self._datatype = None if self._state == STATE_OUTERMOST: if tagURI == RDF_NS_URI + "RDF": self._state = STATE_NO_SUBJECT else: if "R" not in self.flags: self._state = STATE_NOT_RDF # Ignore random XML without rdf:RDF else: self._nodeElement(tagURI, attrs) # Parse it as RDF. # http://www.w3.org/2000/10/rdf-tests/rdfcore/rdf-element-not-mandatory/test001.rdf elif self._state == STATE_NOT_RDF: if tagURI == RDF_NS_URI + "RDF" and "T" in self.flags: self._state = STATE_NO_SUBJECT else: pass # Ignore embedded RDF elif self._state == STATE_NO_SUBJECT: #MS1.0 6.2 obj :: desription | container self._nodeElement(tagURI, attrs) elif self._state == STATE_DESCRIPTION: # Expect predicate (property) PropertyElt # propertyElt #MS1.0 6.12 # http://www.w3.org/2000/03/rdf-tracking/#rdf-containers-syntax-ambiguity if tagURI == RDF_NS_URI + "li": item = self._items[-1] + 1 self._predicate = self.sink.newSymbol("%s_%s" % (RDF_NS_URI, item)) self._items[-1] = item else: if tagURI in propertyElementExceptions: raise BadSyntax(sys.exc_info(), 'Invalid predicate URI: %s' % tagURI) self._predicate = self.sink.newSymbol(tagURI) self._state = STATE_VALUE # May be looking for value but see parse type # self._datatype = None # self._language = None self.testdata = "" # Flush value data # print "\n attributes:", `attrs` properties = [] gotSubject = 0 haveResource = 0 haveParseType = 0 haveExtras = 0 for name, value in attrs.items(): ns, name = name if name == "ID": print "# Warning: ID=%s on statement ignored" % (value) # I consider these a bug raise ValueError("ID attribute? Reification not supported.") elif name == "parseType": haveParseType = 1 # x = value.find(":") # if x>=0: pref = value[:x] # else: pref = "" # nsURI = self._nsmap[-1].get(pref, None) if value == "Resource": c = self._context s = self._subject # self._subject = self.sink.newBlankNode(self._context, why=self._reason2) self.idAboutAttr(attrs) #@@ not according to current syntax @@@@@@@@@@@ self.sink.makeStatement(( c, self._predicate, s, self._subject), why=self._reason2) self._state = STATE_DESCRIPTION # Nest description elif value == "Quote": c = self._context s = self._subject self.idAboutAttr(attrs) # set subject and context for nested description self._subject = self.sink.newFormula() # Forget anonymous genid - context is subect if self._predicate is self.merge: # magic :-( self._stack[-1][3] = self._subject # St C P S retrofit subject of outer level! self._delayedStatement = 1 # flag else: self._delayedStatement = c, self._predicate, s, self._subject self._context = self._subject self._subject = None self._state = STATE_NO_SUBJECT # Inside quote, there is no subject elif (value=="Collection" or value[-11:] == ":collection"): # Is this a daml:collection qname? self._state = STATE_LIST # Linked list of obj's elif value == "Literal" or "S" in self.flags: # Strictly, other types are literal SYN#7.2.20 self._state = STATE_LITERAL # That's an XML subtree not a string self._litDepth = 1 self.LiteralNS = [{}] self.testdata = '' #"@@sax2rdf.py bug@@" # buggy implementation self._datatype = self.sink.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral") if XMLLiteralsAsDomTrees: self.domDocument = self.domImplementation.createDocument( 'http://www.w3.org/1999/02/22-rdf-syntax-ns', 'envelope', None) self.domElement = self.domDocument.documentElement else: raise SyntaxError("Unknown parse type '%s'" % value ) elif name == "nodeID": assert not gotSubject if not isXML.isNCName(value): raise BadSyntax(sys.exc_info(), 'A nodeID must be a NCName %s' % value) obj = self._nodeIDs.get(value, None) if obj == None: obj = self.newBlankNode() self._nodeIDs[value] = obj self.sink.makeStatement((self._context, self._predicate, self._subject, obj ), why=self._reason2) self._state = STATE_NOVALUE # NOT looking for value self._subject = obj gotSubject = 1 elif name == "resource": haveResource = 1 assert not gotSubject x = self.sink.newSymbol(self.uriref(value)) self.sink.makeStatement((self._context, self._predicate, self._subject, x ), why=self._reason2) self._state = STATE_NOVALUE # NOT looking for value self._subject = x gotSubject = 1 elif name == "datatype": pass # Already set elif ns == XML_NS_URI or name[:3] == "xml": # Ignore (lang is already done) pass # see rdf-tests/rdfcore/unrecognised-xml-attributes/test002.rdf else: haveExtras = 1 properties.append((ns, name, value)) # wait till subject is clear assert haveResource + haveParseType <= 1 assert haveParseType + haveExtras <= 1 if not gotSubject and properties: obj = self.newBlankNode() self.sink.makeStatement((self._context, self._predicate, self._subject, obj ), why=self._reason2) self._state = STATE_NOVALUE # NOT looking for value self._subject = obj for ns, name, value in properties: self._propertyAttr(ns, name, value) elif self._state == STATE_LIST: # damlCollection :: objs - make list # Subject and predicate are set and dangling. c = self._context s = self._subject # The tail of the list so far p = self._predicate pair = self.newBlankNode() # The new pair self.sink.makeStatement(( c, # Link in new pair p, s, pair ), why=self._reason2) self.idAboutAttr(attrs) # set subject (the next item) and context if tagURI != RDF_NS_URI + "Description": self.sink.makeStatement((c, self.sink.newSymbol(RDF_NS_URI +"type"), self._subject, self.sink.newSymbol(tagURI) ), why=self._reason2) self.sink.makeStatement(( c, self.sink.newSymbol(List_NS + "first"), pair, self._subject), why=self._reason2) # new item if "S" in self.flags: # Strictly to spec self.sink.makeStatement(( c, self.sink.newSymbol(RDF_NS_URI + "type"), self.sink.newSymbol(List_NS + "List"), self._subject), why=self._reason2) # new item self._stack[-1][2] = self.sink.newSymbol(List_NS + "rest") # Leave dangling link #@check self._stack[-1][3] = pair # Underlying state tracks tail of growing list elif self._state == STATE_VALUE: # Value :: Obj in this case #MS1.0 6.17 6.2 c = self._context p = self._predicate s = self._subject self._nodeElement(tagURI, attrs) # Parse the object thing's attributes self.sink.makeStatement((c, p, s, self._subject), why=self._reason2) self._stack[-1][0] = STATE_NOVALUE # When we return, cannot have literal now elif self._state == STATE_NOVALUE: str = "" for e in self._stack: str = str + `e`+"\n" raise BadSyntax(sys.exc_info(), """Expected no value, found name=%s; qname=%s, attrs=%s in nested context:\n%s""" %(name, qname, attrs, str)) elif self._state == STATE_LITERAL: self._litDepth = self._litDepth + 1 if XMLLiteralsAsDomTrees: # progress("@@@ XML literal name: ", name) self.literal_element_start_DOM(name, qname, attrs) else: self.literal_element_start(name, qname, attrs) #@@ need to capture the literal else: raise RuntimeError, ("Unknown state in RDF parser", self._stack) # Unknown state