def rx2nt(path, url=None, debug=0, namespaceAware=0, scope=''): ''' given a rxml file return a string of N-triples path is either a stream-like object or a string that is file path ''' stmts = rx2statements(path, url, debug, namespaceAware, scope) outputfile = StringIO.StringIO() RxPath.writeTriples(stmts, outputfile) return outputfile.getvalue()
def ParseRDF(context, contents, type='unknown', uri=''): contents = StringValue(contents) type = StringValue(type) uri = StringValue(uri) if not uri: from Ft.Lib import Uuid uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid()) nsRevMap = getattr(context.node.ownerDocument, 'nsRevMap', None) schemaClass = getattr(context.node.ownerDocument, 'schemaClass', RxPath.defaultSchemaClass) stmts = RxPath.parseRDFFromString(contents, uri, type) return [RxPath.RxPathDOMFromStatements(stmts, nsRevMap, uri, schemaClass)]
def main(argv=sys.argv, out=sys.stdout): if '-n' in argv: print >> out, rx2nt(argv[2]) elif '-z' in argv: print >> out, zml2RDF_XML(file(argv[2]), addRootElement=False) elif '-r' in argv: model, db = RxPath.deserializeRDF(argv[2]) nsMap = {'bnode': BNODE_BASE, 'rx': RX_NS} revNsMap = dict([(x[1], x[0]) for x in nsMap.items() if x[0] and ':' not in x[0]]) rdfDom = RxPath.createDOM(RxPath.FtModel(model), revNsMap) print >> out, getRXAsZMLFromNode(rdfDom.childNodes, nsMap) else: print >> out, '''
def applyXslt(self, xslStylesheet, topLevelParams=None, extFunctionMap=None, baseUri='file:', styleSheetCache=None): processor = RxPath.RxSLTProcessor() result = RxPath.applyXslt(self.dom, xslStylesheet, topLevelParams, extFunctionMap, baseUri, styleSheetCache, processor=processor) return result, processor.stylesheet
def testXPathSecurity(self): ''' test that we can't access insecure 4Suite extension functions after importing raccoon ''' from rx import RxPath from Ft.Xml import XPath node = None context = XPath.Context.Context(node, processorNss=raccoon.DefaultNsMap) from Ft.Xml.XPath import BuiltInExtFunctions #print BuiltInExtFunctions.ExtFunctions[(XPath.FT_EXT_NAMESPACE, 'env-var')] try: RxPath.evalXPath('xf:env-var("foo")', context) except (XPath.RuntimeException), e: pass
def If(context, cond, v1, v2=None): """ just like Ft.Xml.XPath.BuiltInExtFunctions.If but the then and else parameters are strings that evaluated dynamically thus supporting the short circuit logic you expect from if expressions """ from Ft.Xml.XPath import Conversions from rx import raccoon queryCache = getattr(context.node.ownerDocument, 'queryCache', None) expCache = raccoon.RequestProcessor.expCache if Conversions.BooleanValue(cond): xpath = Conversions.StringValue(v1) return RxPath.evalXPath(xpath, context, expCache, queryCache) elif v2 is None: return [] else: xpath = Conversions.StringValue(v2) return RxPath.evalXPath(xpath, context, expCache, queryCache)
def processContents(self, result, kw, contextNode, contents): server = kw['__server__'] type = kw.get('parseType', self.rdfFormat) uri = kw.get('__resource') if uri: uri = RxPath.StringValue(uri) resources = kw.get('resourceToReplace') #returns None -- ok!? return server.updateStoreWithRDF(contents, type, uri, resources)
def SerializeRDF(context, resultset, type='rdfxml', nsMapString=None, fixUp=None, fixUpPredicate=None): '''Returns a nodeset containing a RDF serialization of the RxPathDOM nodes contained in resultset parameter. nsMapString is a namespace dictionary encoded as a string in the form of "prefix^uri^prefix^uri..." ''' stmts = [] uri2prefixMap = None if nsMapString: import itertools nslist = StringValue(nsMapString).split('^') uri2prefixMap = dict( itertools.izip(itertools.islice(nslist, 1, None, 2), itertools.islice(nslist, 0, None, 2))) if resultset: if uri2prefixMap is None: uri2prefixMap = resultset[0].rootNode.nsRevMap if resultset[0].nodeName == '#document': resultset = resultset[0].childNodes for n in resultset: nl = [n] if RxPath.isResource(context, nl): preds = n.childNodes elif RxPath.isPredicate(context, nl): preds = nl else: preds = [] #error? for p in preds: stmts.extend(p.getModelStatements()) if (RxPath.isResource(context, p.childNodes) and p.firstChild.isCompound()): #object is a list so add all the list items too stmts.extend(p.firstChild.getModelStatements()) return RxPath.serializeRDF(stmts, type, uri2prefixMap, fixUp, fixUpPredicate)
def eval(l, node): mapContext.node = node mapContext.position += 1 mapContext.varBindings[(RXWIKI_XPATH_EXT_NS, 'current')] = node result = RxPath.evalXPath(xpath, mapContext, expCache, queryCache) if type(result) != type([]): if not isinstance(result, unicode): result = unicode(str(result), 'utf8') result = String2NodeSet(mapContext, result) l.extend(result) return l
def getResource(s, rxNSPrefix, nsMap, thisResource): typeName = None if matchName(s, rxNSPrefix, 'this-resource'): assert thisResource resource = thisResource elif matchName(s, rxNSPrefix, 'resource'): id = s.getAttributeNS(EMPTY_NAMESPACE, 'id') resource = id if not id: resource = RxPath.generateBnode() else: #deprecated if the element has an id element treat as the resource URI ref and the element name as the class type id = getAttributefromQName(s, rxNSPrefix, 'id') if id is not None: resource = id if not resource: resource = RxPath.generateBnode() typeName = getURIFromElementName(s, nsMap) else: resource = getURIFromElementName(s, nsMap) return resource, typeName
def rx2model(path, url=None, debug=0, namespaceAware=0, scope=''): ''' Parse the RxML and returns a 4Suite model containing its statements. ''' from xml.dom import expatbuilder if url: isrc = InputSource.DefaultFactory.fromUri(url) src = isrc.stream else: src = path doc = expatbuilder.parse(src, namespaces=namespaceAware) outputModel = RxPath.MemModel() nsMap = addRxdom2Model(doc, outputModel, thisResource='wikiwiki:', scope=scope) return outputModel, nsMap
def testCaching(self): root = raccoon.RequestProcessor(a='testMinimalApp.py', model_uri='test:') from rx import RxPath from Ft.Xml import XPath node = root.domStore.dom kw = {'url': 'foo:', '__server__': root} vars, extFunMap = root.mapToXPathVars(kw) context = XPath.Context.Context(node, varBindings=vars, extFunctionMap=extFunMap, processorNss=raccoon.DefaultNsMap) xpath = "wf:get-metadata('url')" compExpr = RxPath._compileXPath(xpath, context) key = raccoon.getKeyFromXPathExp(compExpr, context, root.NOT_CACHEABLE_FUNCTIONS) self.failUnless(key == ('wf:get-metadata("url")', (None, u'url'), 'foo:', node.getKey())) styleSheetContents = ''' <x:stylesheet version="1.0" xmlns:x="http://www.w3.org/1999/XSL/Transform" xmlns:wf='http://rx4rdf.sf.net/ns/raccoon/xpath-ext#'> <x:template match='/'> <x:variable name='url' select='wf:get-metadata("url")' /> </x:template></x:stylesheet> ''' styleSheetKey = raccoon.getXsltCacheKeyPredicate( root.styleSheetCache, root.NOT_CACHEABLE_FUNCTIONS, styleSheetContents, '<root />', kw, node, styleSheetUri='test:') self.failUnless(styleSheetKey == (styleSheetContents, 'test:', '<root />', node.getKey(), ((None, u'url'), False), ((None, u'url'), 'foo:')))
def getResourceNameFromURI(resNode): namespaceURI = resNode.getAttributeNS(RDF_MS_BASE, 'about') assert namespaceURI prefixURI, rest = RxPath.splitUri(namespaceURI) #print >>sys.stderr, 'spl %s %s %s' % (namespaceURI, prefixURI, rest) #print revNsMap if not rest: printResourceElem = True elif revNsMap.has_key(prefixURI): printResourceElem = False #elif resNode.ownerDocument.nsRevMap.has_key(prefixURI): # prefix = resNode.ownerDocument.nsRevMap[prefixURI] # nsMap[prefix] = prefixURI # revNsMap[prefixURI] = prefix # printResourceElem = False else: printResourceElem = True if not printResourceElem: prefix = revNsMap[prefixURI] if prefix: retVal = prefix + ':' + rest else: retVal = rest if fixUp: retVal = fixUp % utils.kw2dict( uri=namespaceURI, encodeduri=urllib.quote(namespaceURI), res=retVal) else: if fixUp: namespaceURI = fixUp % utils.kw2dict( uri=namespaceURI, encodeduri=urllib.quote(namespaceURI), res=namespaceURI) #retVal = rxPrefix + 'resource id="' + namespaceURI + '"' retVal = '{' + namespaceURI + '}' return retVal
def addList2Model(model, subject, p, listID, scope, getObject=getObject): prevListID = None for child in p.childNodes: if child.nodeType == p.COMMENT_NODE: continue object, objectType = getObject(child) if prevListID: listID = RxPath.generateBnode() model.addStatement( Statement(prevListID, RDF_MS_BASE + 'type', RDF_MS_BASE + 'List', OBJECT_TYPE_RESOURCE, scope)) model.addStatement( Statement(prevListID, RDF_MS_BASE + 'rest', listID, OBJECT_TYPE_RESOURCE, scope)) model.addStatement( Statement(listID, RDF_MS_BASE + 'first', object, objectType, scope)) prevListID = listID model.addStatement( Statement(listID, RDF_MS_BASE + 'type', RDF_MS_BASE + 'List', OBJECT_TYPE_RESOURCE, scope)) model.addStatement( Statement(listID, RDF_MS_BASE + 'rest', RDF_MS_BASE + 'nil', OBJECT_TYPE_RESOURCE, scope))
def _to_sjson(self, root, depth=-1, exclude_blankids=False): """ If resource is a references more than once, just the string is output RDF lists and containers that are not SJSON sequences >>> r = Res("http://example.org/book#1"); r['v1'] = 'string'; r['v2'] = 1; >>> "http://example.org/book#2" >>> r['l'] = [1, 2, 3, 4, 5]; r['r'] = Res('o') >>> sjson().to_sjson(doc()) """ #XXX depth #XXX exclude_blankids (but if false, will need to add back if shared) #use RxPathDom, expensive but arranges as sorted tree, normalizes RDF collections et al. #and is schema aware from rx import RxPathDom if not isinstance(root, RxPathDom.Node): #assume doc is iterator of statements or quad tuples #note: order is not preserved root = RxPath.createDOM(RxPath.MemModel(root), schemaClass=RxPath.BaseSchema) results = [] seen = {} shared = {} if isinstance(root, (RxPathDom.Document, RxPathDom.DocumentFragment)): #XXX RxPathDom.DocumentFragment if isinstance(root, RxPathDom.Document): #filter out propseq resources and resources with no properties nodes = [n for n in root.childNodes if n.childNodes and not n.matchName(JSON_BASE,'propseqtype')] else: nodes = [n for n in root.childNodes] #from pprint import pprint #pprint(nodes) results = [{} for i in xrange(0, len(nodes))] todo = [(results, i, n) for i,n in enumerate(nodes)] elif isinstance(root, RxPathDom.Resource): results = [ {} ] todo = [(results, 0, root)] elif isinstance(root, RxPathDom.BasePredicate): obj = p.childNodes[0] key = self.QName(root.parentNode.uri) propmap = { self.PROPERTYMAP : self.QName(root.stmt.predicate) } if isinstance(obj, RxPathDom.Text): v = self._value(obj) todo = [] else: v = {} todo = [ (propmap, key, obj) ] propmap[key] = v results = [propmap] elif isinstance(root, RxPathDom.Text): #return string value return self._value(root); else: raise TypeError('Unexpected root node') def setobj(obj, res, parent, key): if isinstance(obj, RxPathDom.Text): v = self._value(obj) #otherwise its a resource elif obj.uri == res.uri: #object is same as subject v = self.QName(obj.uri) elif obj.uri == RDF_MS_BASE + 'nil': v = [] #empty list else: uri = obj.uri #if an object appears in the tree more than once, #replace prior reference with uri #and add to shared prior = seen.get(uri) #print 'seen', uri, prior if prior: v = self.QName(uri) shared[v] = prior[0][ prior[1] ] prior[0][ prior[1] ] = v else: v = {} #print 'add to seen', uri, type(uri) seen[uri] = (parent, key, v) todo.append( (parent, key, obj) ) parent[ key ] = v def setPropSeq(propseq, res): #XXX what about empty lists? childlist = [] for p in propseq.childNodes: prop = p.stmt.predicate obj = p.childNodes[0] if prop == PROPBAG: propbag = obj.uri elif prop == RxPath.RDF_SCHEMA_BASE+u'member': childlist.append(0) setobj(obj, res, childlist, len(childlist)-1) return propbag, childlist while todo: #res (the resource) has already been attached to its parent #now we need to assign its properties parent, key, res = todo.pop() if res.uri not in seen: #print 'add subject to seen', res.uri, type(res.uri) seen[res.uri] = (parent, key, parent[key]) else: #print 'subject seen', res.uri parent[key] = res.uri #replace object with uri reference prior = seen[res.uri] key = self.QName(res.uri) if key in shared: continue else: #add to shared shared[key] = prior[2] parent = shared if res.childNodes: s = parent[key] s[self.ID] = res.uri else: #no properties, just write out the id, not a dict #and don't bother including it in shared #XXX but then we can't tell if its an id or just a string? if parent is shared: del parent[key] else: parent[key] = res.uri continue currentlist = [] propseqs = {} for p in res.childNodes: prop = p.stmt.predicate if prop == PROPSEQ: #this will replace sequences seqprop, childlist = setPropSeq(p.childNodes[0], res) s[ self.QName(seqprop) ] = childlist for p in res.childNodes: prop = p.stmt.predicate if prop == PROPSEQ: continue if self.QName(prop) in s: continue #must have be already handled by getPropSeq nextMatches = p.nextSibling and p.nextSibling.stmt.predicate == prop #XXX Test empty and singleton rdf lists and containers if nextMatches or currentlist: parent = currentlist key = len(currentlist) currentlist.append(0) else: parent = s key = self.QName(prop) obj = p.childNodes[0] setobj(obj, res, parent, key) if currentlist and not nextMatches: s[ self.QName(prop) ] = currentlist currentlist = [] #list done if shared: results = { 'results':results, 'shared':shared} #if self.nsmap: # results['prefix'] = self.nsmap return results
def initModel(location, defaultModel): if os.path.exists(location): source = location else: source = defaultModel return RxPath.MemModel(source)
def modelFromJson(model): model = sjson.sjson().to_rdf( { 'results' : model } ) return RxPath.MemModel(model)
def _saveContents(self, filepath, contents, altfilename=None, indexURI=None, title='', previousRevisionDigest='', maxLiteral=None): ''' this is kind of ugly; XUpdate doesn't have an eval() function, so we build up a string of xml and then parse it and then return the doc as a nodeset and use xupdate:copy-of on the nodeset ''' if indexURI: self.addToIndex(indexURI, contents, title) if maxLiteral is None: maxLiteral = self.MAX_MODEL_LITERAL #print >>sys.stderr, 'sc', filepath, title, contents ns = '''xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' xmlns:a="http://rx4rdf.sf.net/ns/archive#" xmlns:wiki="http://rx4rdf.sf.net/ns/wiki#"''' contentLength = len(contents) if filepath and maxLiteral > -1 and contentLength > maxLiteral: #save as file dir = os.path.split(filepath)[0] try: os.makedirs(dir) except OSError: pass #dir might already exist ff = TxnFileFactory(filepath) self.server.txnSvc.join(ff) f = ff.create('b') #f = file(filepath, 'wb') f.write(contents) f.close() digest = utils.shaDigestString(contents) if altfilename and self.ALTSAVE_DIR: #we save another copy of the last revision in a location that #that can be safely accessed and modified by external programs #without breaking diffs etc. altfilepath = os.path.join(self.ALTSAVE_DIR, altfilename) abspath = os.path.abspath(altfilepath) prefixlen = len( InputSource.DefaultFactory.resolver.getPrefix(abspath)) assert prefixlen, ("filepath %s must be on Raccoon's PATH" % abspath) altPathURI = raccoon.SiteUriResolver.OsPathToPathUri( abspath[prefixlen + 1:]) #if the altfilepath already exists compare its digest with the previous #revisions digest and don't overwrite this file if they don't match #-- instead add a wiki:save-conflict property to the new contentlocation. if os.path.exists(altfilepath): existingDigest = utils.shaDigest(altfilepath) if existingDigest == digest: #identical to the contents, so no need to write saveAltFile = False conflict = False else: conflict = True if previousRevisionDigest: #if these are equal, its ok to overwrite saveAltFile = previousRevisionDigest == existingDigest else: saveAltFile = False else: saveAltFile = True altContents = ("<wiki:alt-contents><a:ContentLocation " "rdf:about='%s' /></wiki:alt-contents>" % altPathURI) if saveAltFile: dir = os.path.split(altfilepath)[0] try: os.makedirs(dir) except OSError: pass #dir might already exist ff = TxnFileFactory(altfilepath) self.server.txnSvc.join(ff) f = ff.create('b') #f = file(altfilepath, 'wb') f.write(contents) f.close() elif conflict: self.log.warning( "conflict trying to save revision to ALTSAVE_DIR: " "unrecognized contents at %s" % altfilepath) altContents = ("<wiki:save-conflict><a:ContentLocation " "rdf:about='%s' /></wiki:save-conflict>" % altPathURI) else: altContents = '' contentProps = ("<a:content-length>%u</a:content-length>" "<a:sha1-digest>%s</a:sha1-digest>" % (contentLength, digest)) abspath = os.path.abspath(filepath) prefixlen = len( InputSource.DefaultFactory.resolver.getPrefix(abspath)) assert prefixlen, ("filepath %s must be on Raccoon's PATH" % abspath) filepathURI = raccoon.SiteUriResolver.OsPathToPathUri( abspath[prefixlen + 1:]) #print >>sys.stderr, abspath, abspath[prefixlen+1:], prefixlen, filepathURI xml = ("<a:ContentLocation %(ns)s rdf:about='%(filepathURI)s'>" "%(contentProps)s%(altContents)s</a:ContentLocation>" % locals()) else: #save the contents inside the model try: if isinstance(contents, str): #test to see if the string can be treated as utf8 contents.decode('utf8') return contents #contents = utils.htmlQuote(contents) #xml = '''<a:Content rdf:about='%(sha1urn)'>%(contentProps)s<a:contents>%(contents)s</a:contents></<a:Content>''' % locals() except UnicodeError: #could be binary, base64 encode encodedURI = RxPath.generateBnode() contents = base64.encodestring(contents) xml = ( "<a:ContentTransform %(ns)s rdf:about='%(encodedURI)s'>" "<a:transformed-by>" "<rdf:Description rdf:about='http://www.w3.org/2000/09/xmldsig#base64'/>" "</a:transformed-by>" "<a:contents>%(contents)s</a:contents>" "</a:ContentTransform>" % locals()) #print >>sys.stderr, 'sc', xml from Ft.Xml import Domlette #why can't InputSource accept unicode? lame (thus we don't support unicode filenames right now) isrc = InputSource.DefaultFactory.fromString(str(xml), 'file:') xmlDoc = Domlette.NonvalidatingReader.parse(isrc) #return a nodeset containing the root element of the doc #print >>sys.stderr, 'sc', xmlDoc.documentElement return [xmlDoc.documentElement]
def GenerateBnode(context, name=None): if name is not None: name = StringValue(name) return RxPath.generateBnode(name)
def rxml2RxPathDOM(path, url=None, debug=0, namespaceAware=0): outputModel, nsMap = rx2model(path, url, debug, namespaceAware) #todo: bug! revNsMap doesn't work with 2 prefixes one ns revNsMap = dict(map(lambda x: (x[1], x[0]), nsMap.items())) #uri to prefix namespace map return RxPath.createDOM(outputModel, revNsMap)
def outputPredicate(predNode, indent): if revNsMap.has_key(predNode.namespaceURI): prefix = revNsMap[predNode.namespaceURI] else: prefix = predNode.prefix nsMap[prefix] = predNode.namespaceURI revNsMap[predNode.namespaceURI] = prefix if predNode.namespaceURI == RDF_MS_BASE and predNode.localName == 'type': predicateString = rxPrefix + 'a' #use rx:a instead rdf:type elif prefix: predicateString = prefix + ':' + predNode.localName else: predicateString = predNode.localName if fixUpPredicate: predURI = RxPath.getURIFromElementName(predNode) eu = urllib.quote(predURI) predicateString = fixUpPredicate % utils.kw2dict( uri=predURI, encodeduri=eu, predicate=predicateString) line = indent + predicateString id = predNode.getAttributeNS(RDF_MS_BASE, 'ID') if id: line += ' ' + rxPrefix + RX_STMTID_ATTRIB + '="' + id + '"' assert len(predNode.childNodes) == 1 if predNode.childNodes[0].nodeType == predNode.TEXT_NODE: lang = predNode.getAttributeNS(XML_NAMESPACE, 'lang') datatype = predNode.getAttributeNS(RDF_MS_BASE, 'datatype') if lang or datatype: line += ': ' line += NL indent += INDENT line += indent + rxPrefix + RX_LITERALELEM if lang: line += ' xml:lang="' + lang + '"' if datatype: #note we don't bother to check if its xml literal and parse and output as zml line += ' rdf:datatype="' + datatype + '"' line += ': ' line += doQuote(predNode.childNodes[0].nodeValue) + NL else: object = predNode.childNodes[0] isList = object.isCompound() if isList: line += ' ' + rxPrefix + 'list="' + object.getAttributeNS( RDF_MS_BASE, 'about') + '"' isList = isList[len(RDF_MS_BASE):] if isList != 'List': assert isList in ['Alt', 'Seq', 'Bag'], 'isList should not be ' + isList line += ' ' + rxPrefix + 'listType="rdf:' + isList + '"' line += ': ' line += NL indent += INDENT if isList: #is the object a list resource? for li in [ p.childNodes[0] for p in object.childNodes if RxPath.getURIFromElementName(p) in [RDF_MS_BASE + 'first', RDF_SCHEMA_BASE + 'member'] ]: if li.nodeType == li.TEXT_NODE: lang = li.parentNode.getAttributeNS( XML_NAMESPACE, 'lang') datatype = li.parentNode.getAttributeNS( RDF_MS_BASE, 'datatype') if lang: attr = ' xml:lang="' + lang + '"' elif datatype: #note we don't bother to check if its xml literal and parse and output as zml attr = ' rdf:datatype="' + datatype + '"' else: attr = '' line += indent + rxPrefix + RX_LITERALELEM + attr + ':' + doQuote( li.nodeValue) + NL elif li.nodeType == li.ELEMENT_NODE: line += indent + getResourceNameFromURI(li) + NL else: line += indent + getResourceNameFromURI(object) + NL return line
def getRXAsZMLFromNode(resourceNodes, nsMap=None, includeRoot=False, INDENT=' ', NL='\n', INITINDENT='', rescomment='', fixUp=None, fixUpPredicate=None): '''given a nodeset of RxPathDom nodes, return RxML serialization in ZML markup format''' def getResourceNameFromURI(resNode): namespaceURI = resNode.getAttributeNS(RDF_MS_BASE, 'about') assert namespaceURI prefixURI, rest = RxPath.splitUri(namespaceURI) #print >>sys.stderr, 'spl %s %s %s' % (namespaceURI, prefixURI, rest) #print revNsMap if not rest: printResourceElem = True elif revNsMap.has_key(prefixURI): printResourceElem = False #elif resNode.ownerDocument.nsRevMap.has_key(prefixURI): # prefix = resNode.ownerDocument.nsRevMap[prefixURI] # nsMap[prefix] = prefixURI # revNsMap[prefixURI] = prefix # printResourceElem = False else: printResourceElem = True if not printResourceElem: prefix = revNsMap[prefixURI] if prefix: retVal = prefix + ':' + rest else: retVal = rest if fixUp: retVal = fixUp % utils.kw2dict( uri=namespaceURI, encodeduri=urllib.quote(namespaceURI), res=retVal) else: if fixUp: namespaceURI = fixUp % utils.kw2dict( uri=namespaceURI, encodeduri=urllib.quote(namespaceURI), res=namespaceURI) #retVal = rxPrefix + 'resource id="' + namespaceURI + '"' retVal = '{' + namespaceURI + '}' return retVal def outputPredicate(predNode, indent): if revNsMap.has_key(predNode.namespaceURI): prefix = revNsMap[predNode.namespaceURI] else: prefix = predNode.prefix nsMap[prefix] = predNode.namespaceURI revNsMap[predNode.namespaceURI] = prefix if predNode.namespaceURI == RDF_MS_BASE and predNode.localName == 'type': predicateString = rxPrefix + 'a' #use rx:a instead rdf:type elif prefix: predicateString = prefix + ':' + predNode.localName else: predicateString = predNode.localName if fixUpPredicate: predURI = RxPath.getURIFromElementName(predNode) eu = urllib.quote(predURI) predicateString = fixUpPredicate % utils.kw2dict( uri=predURI, encodeduri=eu, predicate=predicateString) line = indent + predicateString id = predNode.getAttributeNS(RDF_MS_BASE, 'ID') if id: line += ' ' + rxPrefix + RX_STMTID_ATTRIB + '="' + id + '"' assert len(predNode.childNodes) == 1 if predNode.childNodes[0].nodeType == predNode.TEXT_NODE: lang = predNode.getAttributeNS(XML_NAMESPACE, 'lang') datatype = predNode.getAttributeNS(RDF_MS_BASE, 'datatype') if lang or datatype: line += ': ' line += NL indent += INDENT line += indent + rxPrefix + RX_LITERALELEM if lang: line += ' xml:lang="' + lang + '"' if datatype: #note we don't bother to check if its xml literal and parse and output as zml line += ' rdf:datatype="' + datatype + '"' line += ': ' line += doQuote(predNode.childNodes[0].nodeValue) + NL else: object = predNode.childNodes[0] isList = object.isCompound() if isList: line += ' ' + rxPrefix + 'list="' + object.getAttributeNS( RDF_MS_BASE, 'about') + '"' isList = isList[len(RDF_MS_BASE):] if isList != 'List': assert isList in ['Alt', 'Seq', 'Bag'], 'isList should not be ' + isList line += ' ' + rxPrefix + 'listType="rdf:' + isList + '"' line += ': ' line += NL indent += INDENT if isList: #is the object a list resource? for li in [ p.childNodes[0] for p in object.childNodes if RxPath.getURIFromElementName(p) in [RDF_MS_BASE + 'first', RDF_SCHEMA_BASE + 'member'] ]: if li.nodeType == li.TEXT_NODE: lang = li.parentNode.getAttributeNS( XML_NAMESPACE, 'lang') datatype = li.parentNode.getAttributeNS( RDF_MS_BASE, 'datatype') if lang: attr = ' xml:lang="' + lang + '"' elif datatype: #note we don't bother to check if its xml literal and parse and output as zml attr = ' rdf:datatype="' + datatype + '"' else: attr = '' line += indent + rxPrefix + RX_LITERALELEM + attr + ':' + doQuote( li.nodeValue) + NL elif li.nodeType == li.ELEMENT_NODE: line += indent + getResourceNameFromURI(li) + NL else: line += indent + getResourceNameFromURI(object) + NL return line if fixUp: #if fixUp we assume we're outputing xml/html not zml doQuote = lambda s: '`' + utils.htmlQuote(s) else: doQuote = quoteString if nsMap is None: nsMap = {'bnode': BNODE_BASE, RX_META_DEFAULT: RX_NS} revNsMap = dict([(x[1], x[0]) for x in nsMap.items() if x[0] and ':' not in x[0] and x[0] not in [RX_META_DEFAULT, RX_BASE_DEFAULT]]) if nsMap.has_key(RX_META_DEFAULT): revNsMap[nsMap[RX_META_DEFAULT]] = '' rxPrefix = revNsMap.get(RX_NS, 'rx') if rxPrefix: rxPrefix += ':' indent = INITINDENT line = prefixes = root = '' if includeRoot: indent += INDENT root += '#?zml0.7 markup' + NL root += INITINDENT + rxPrefix + 'rx:' + NL elif not fixUp: #if fixUp we assume we're outputing xml/html not zml root += '#?zml0.7 markup' + NL if not isinstance(resourceNodes, (list, tuple)): resourceNodes = [resourceNodes] for resourceNode in resourceNodes: if RxPath.isPredicate(None, [resourceNode]): predicateNodes = [resourceNode] resourceNode = resourceNode.parentNode else: predicateNodes = resourceNode.childNodes line += indent + getResourceNameFromURI(resourceNode) + ':' if rescomment: line += ' #' + rescomment line += NL for p in predicateNodes: line += outputPredicate(p, indent + INDENT) line += NL if nsMap: prefixes = indent + rxPrefix + 'prefixes:' + NL for prefix, ns in nsMap.items(): prefixes += indent + INDENT + prefix + ': `' + ns + NL prefixes += NL return root + prefixes + line
def addResource(model, scope, resource, resourceElem, rxNSPrefix, nsMap, thisResource, noStmtIds=False): ''' add the children of a RXML resource element to the model ''' for p in resourceElem.childNodes: if p.nodeType != p.ELEMENT_NODE: continue if matchName(p, rxNSPrefix, 'resource'): predicate = p.getAttributeNS(EMPTY_NAMESPACE, 'id') elif matchName(p, rxNSPrefix, 'a'): #alias for rdf:type predicate = RDF_MS_BASE + 'type' else: predicate = getURIFromElementName(p, nsMap) id = getAttributefromQName(p, rxNSPrefix, RX_STMTID_ATTRIB) if not id: id = p.getAttributeNS(EMPTY_NAMESPACE, RX_STMTID_ATTRIB) if id and noStmtIds: raise RxMLError(RX_STMTID_ATTRIB + ' attribute found at illegal location') if id: raise RxMLError(RX_STMTID_ATTRIB + ' attribute not yet supported') object = getAttributefromQName(p, rxNSPrefix, 'res') #this is deprecated if object: objectType = OBJECT_TYPE_RESOURCE elif (getAttributefromQName(p, rxNSPrefix, 'list') is not None or getAttributefromQName(p, {'': EMPTY_NAMESPACE}, 'list') is not None or getAttributefromQName(p, rxNSPrefix, 'listType') is not None or getAttributefromQName( p, {'': EMPTY_NAMESPACE}, 'listType') is not None or len([ c for c in p.childNodes if c.nodeType != p.COMMENT_NODE and c.nodeValue and c.nodeValue.strip() ]) > 1): #the object of this predicate is a list listID = getAttributefromQName(p, rxNSPrefix, 'list') if not listID: listID = p.getAttributeNS(EMPTY_NAMESPACE, 'list') if not listID: listID = RxPath.generateBnode() model.addStatement( Statement(resource, predicate, listID, OBJECT_TYPE_RESOURCE, scope)) listType = getAttributefromQName(p, rxNSPrefix, 'listType') if not listID: listType = p.getAttributeNS(EMPTY_NAMESPACE, 'listType') getObjectFunc = lambda elem: getObject(elem, rxNSPrefix, nsMap, thisResource) if not listType or listType == 'rdf:List': addList2Model(model, resource, p, listID, scope, getObjectFunc) else: addContainer2Model(model, resource, p, listID, scope, getObjectFunc, listType) continue else: #object is a a literal or resource childNodes = [ child for child in p.childNodes if child.nodeType != child.COMMENT_NODE ] if not childNodes: #if predicate has no child we assume its an empty literal #this could be the result of the common error with ZML #where the ':' was missing after the predicate invalidAttrLocalNames = [ attName[1] for attName in p.attributes.keys() if attName[1] not in [RX_STMTID_ATTRIB, 'id'] ] if invalidAttrLocalNames: #there's an attribute that not either 'stmtid' or 'rdf:id' raise RxMLError('invalid attribute ' + invalidAttrLocalNames[0] + ' on predicate element ' + p.localName + ' -- did you forget a ":"?') object, objectType = "", OBJECT_TYPE_LITERAL else: assert len(childNodes) == 1, p object, objectType = getObject(childNodes[0], rxNSPrefix, nsMap, thisResource) #print >>sys.stderr, 'adding ', repr(resource), repr(predicate), object, model.addStatement( Statement(resource, predicate, object, objectType, scope))
def to_rdf(self, json): scope = '' m = RxPath.MemModel() if isinstance(json, (str,unicode)): todo = json = json.loads(json) if isinstance(json, dict): todo = [r for r in json.get('results',[]) if isinstance(r, dict)] if 'shared' in json: todo.extend( json['shared'].values() ) else: todo = json if not isinstance(todo, list): raise TypeError('whats this?') #nsmapstack = [ self.nsmap.copy() ] nsmap = self.nsmap def getorsetid(obj): #nsmap = nsmapstack.pop() nsmapprop = _expandqname('nsmap', nsmap) nsmapval = obj.get(nsmapprop) if nsmapval is not None: pass #XXX update stack idprop = _expandqname('id', nsmap) id = obj.get(idprop) if id is None: id = self._blank() #XXX obj[idprop] = id return id, idprop while todo: obj = todo.pop() #XXX if obj.nsmap: push nsmap #XXX propmap #XXX idmap id, idprop = getorsetid(obj) for prop, val in obj.items(): if prop == idprop: continue prop = _expandqname(prop, nsmap) if isinstance(val, dict): objid, idprop = getorsetid(val) m.addStatement( Statement(id, prop, objid, OBJECT_TYPE_RESOURCE, scope) ) todo.push(val) elif isinstance(val, list): #dont build a PROPSEQTYPE if prop in rdf:_ rdf:first rdfs:member specialprop = prop.startswith(RDF_MS_BASE+'_') or prop in [ RDF_MS_BASE+'first', RDF_SCHEMA_BASE+'member'] #XXX special handling for prop == PROPSEQ ? if not specialprop: if not val: m.addStatement( Statement(id, prop, RDF_MS_BASE+'nil', OBJECT_TYPE_RESOURCE, scope) ) else: seq = self._blank() m.addStatement( Statement(seq, RDF_MS_BASE+'type', RDF_MS_BASE+'Seq', OBJECT_TYPE_RESOURCE, scope) ) m.addStatement( Statement(seq, RDF_MS_BASE+'type', PROPSEQTYPE, OBJECT_TYPE_RESOURCE, scope) ) m.addStatement( Statement(seq, PROPBAG, prop, OBJECT_TYPE_RESOURCE, scope) ) m.addStatement( Statement(id, PROPSEQ, seq, OBJECT_TYPE_RESOURCE, scope) ) for i, item in enumerate(val): if isinstance(item, dict): itemid, idprop = getorsetid(val) #XXX m.addStatement( Statement(id, prop, itemid, OBJECT_TYPE_RESOURCE, scope) ) if not specialprop: m.addStatement( Statement(seq, RDF_MS_BASE+'_'+str(i+1), id, OBJECT_TYPE_RESOURCE, scope) ) todo.push(val) elif isinstance(item, list): pass #XXX nested lists: add JSONSEQ else: #simple type if self.lookslikeUriOrQname(item): objecttype = OBJECT_TYPE_RESOURCE else: objecttype = OBJECT_TYPE_LITERAL m.addStatement( Statement(id, prop, item, objecttype, scope) ) if not specialprop: m.addStatement( Statement(seq, RDF_MS_BASE+'_'+str(i+1), item, objecttype, scope) ) else: #simple type if self.lookslikeUriOrQname(val): objecttype = OBJECT_TYPE_RESOURCE else: objecttype = OBJECT_TYPE_LITERAL m.addStatement( Statement(id, prop, val, objecttype, scope) ) return m.getStatements()
def loadDom(self, requestProcessor): self.log = logging.getLogger("domstore." + requestProcessor.appName) normalizeSource = getattr(self.modelFactory, 'normalizeSource', DomStore._normalizeSource) source = normalizeSource(self, requestProcessor, self.STORAGE_PATH) modelUri = requestProcessor.MODEL_RESOURCE_URI if self.saveHistory: from rx import RxPathGraph initCtxUri = RxPathGraph.getTxnContextUri(modelUri, 0) else: initCtxUri = '' defaultStmts = RxPath.NTriples2Statements(self.defaultTripleStream, initCtxUri) if self.VERSION_STORAGE_PATH: normalizeSource = getattr(self.versionModelFactory, 'normalizeSource', DomStore._normalizeSource) versionStoreSource = normalizeSource(self, requestProcessor, self.VERSION_STORAGE_PATH) delmodel = self.versionModelFactory(source=versionStoreSource, defaultStatements=[]) else: delmodel = None #note: to override loadNtriplesIncrementally, set this attribute #on your custom modelFactory function if self.saveHistory and getattr(self.modelFactory, 'loadNtriplesIncrementally', False): if not delmodel: delmodel = RxPath.MemModel() dmc = RxPathGraph.DeletionModelCreator(delmodel) model = self.modelFactory(source=source, defaultStatements=defaultStmts, incrementHook=dmc) lastScope = dmc.lastScope else: model = self.modelFactory(source=source, defaultStatements=defaultStmts) lastScope = None if self.APPLICATION_MODEL: appTriples = StringIO.StringIO(self.APPLICATION_MODEL) stmtGen = RxPath.NTriples2Statements(appTriples, RxPathGraph.APPCTX) appmodel = RxPath.MemModel(stmtGen) model = RxPath.MultiModel(model, appmodel) if self.transactionLog: model = RxPath.MirrorModel( model, RxPath.IncrementalNTriplesFileModel(self.transactionLog, [])) if self.saveHistory: graphManager = RxPathGraph.NamedGraphManager( model, delmodel, lastScope) else: graphManager = None #reverse namespace map #todo: bug! revNsMap doesn't work with 2 prefixes one ns revNsMap = dict([(x[1], x[0]) for x in requestProcessor.nsMap.items()]) self.dom = RxPath.createDOM(model, revNsMap, modelUri=modelUri, schemaClass=self.schemaFactory, graphManager=graphManager) self.dom.addTrigger = self.addTrigger self.dom.removeTrigger = self.removeTrigger self.dom.newResourceTrigger = self.newResourceTrigger #associate the queryCache with the DOM Document self.dom.queryCache = requestProcessor.queryCache
def evalXPath(self, xpath, context, expCache=None, queryCache=None): self.log.debug(xpath) return RxPath.evalXPath(xpath, context, expCache, queryCache)