Ejemplo n.º 1
0
def rx2nt(path, url=None, debug=0, namespaceAware=0, scope=''):
    '''
    given a rxml file return a string of N-triples
    path is either a stream-like object or a string that is file path
    '''
    stmts = rx2statements(path, url, debug, namespaceAware, scope)
    outputfile = StringIO.StringIO()
    RxPath.writeTriples(stmts, outputfile)
    return outputfile.getvalue()
Ejemplo n.º 2
0
def ParseRDF(context, contents, type='unknown', uri=''):
    contents = StringValue(contents)
    type = StringValue(type)
    uri = StringValue(uri)
    if not uri:
        from Ft.Lib import Uuid
        uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid())
    nsRevMap = getattr(context.node.ownerDocument, 'nsRevMap', None)
    schemaClass = getattr(context.node.ownerDocument, 'schemaClass',
                          RxPath.defaultSchemaClass)
    stmts = RxPath.parseRDFFromString(contents, uri, type)
    return [RxPath.RxPathDOMFromStatements(stmts, nsRevMap, uri, schemaClass)]
Ejemplo n.º 3
0
def main(argv=sys.argv, out=sys.stdout):
    if '-n' in argv:
        print >> out, rx2nt(argv[2])
    elif '-z' in argv:
        print >> out, zml2RDF_XML(file(argv[2]), addRootElement=False)
    elif '-r' in argv:
        model, db = RxPath.deserializeRDF(argv[2])
        nsMap = {'bnode': BNODE_BASE, 'rx': RX_NS}
        revNsMap = dict([(x[1], x[0]) for x in nsMap.items()
                         if x[0] and ':' not in x[0]])
        rdfDom = RxPath.createDOM(RxPath.FtModel(model), revNsMap)
        print >> out, getRXAsZMLFromNode(rdfDom.childNodes, nsMap)
    else:
        print >> out, '''        
Ejemplo n.º 4
0
 def applyXslt(self,
               xslStylesheet,
               topLevelParams=None,
               extFunctionMap=None,
               baseUri='file:',
               styleSheetCache=None):
     processor = RxPath.RxSLTProcessor()
     result = RxPath.applyXslt(self.dom,
                               xslStylesheet,
                               topLevelParams,
                               extFunctionMap,
                               baseUri,
                               styleSheetCache,
                               processor=processor)
     return result, processor.stylesheet
Ejemplo n.º 5
0
 def testXPathSecurity(self):
     '''
     test that we can't access insecure 4Suite extension functions
     after importing raccoon
     '''
     from rx import RxPath
     from Ft.Xml import XPath
     node = None
     context = XPath.Context.Context(node,
                                     processorNss=raccoon.DefaultNsMap)
     from Ft.Xml.XPath import BuiltInExtFunctions
     #print BuiltInExtFunctions.ExtFunctions[(XPath.FT_EXT_NAMESPACE, 'env-var')]
     try:
         RxPath.evalXPath('xf:env-var("foo")', context)
     except (XPath.RuntimeException), e:
         pass
Ejemplo n.º 6
0
def If(context, cond, v1, v2=None):
    """
    just like Ft.Xml.XPath.BuiltInExtFunctions.If
    but the then and else parameters are strings that evaluated dynamically 
    thus supporting the short circuit logic you expect from if expressions
    """
    from Ft.Xml.XPath import Conversions
    from rx import raccoon
    queryCache = getattr(context.node.ownerDocument, 'queryCache', None)
    expCache = raccoon.RequestProcessor.expCache
    if Conversions.BooleanValue(cond):
        xpath = Conversions.StringValue(v1)
        return RxPath.evalXPath(xpath, context, expCache, queryCache)
    elif v2 is None:
        return []
    else:
        xpath = Conversions.StringValue(v2)
        return RxPath.evalXPath(xpath, context, expCache, queryCache)
Ejemplo n.º 7
0
 def processContents(self, result, kw, contextNode, contents):
     server = kw['__server__']
     type = kw.get('parseType', self.rdfFormat)
     uri = kw.get('__resource')
     if uri:
         uri = RxPath.StringValue(uri)
     resources = kw.get('resourceToReplace')
     #returns None -- ok!?
     return server.updateStoreWithRDF(contents, type, uri, resources)
Ejemplo n.º 8
0
def SerializeRDF(context,
                 resultset,
                 type='rdfxml',
                 nsMapString=None,
                 fixUp=None,
                 fixUpPredicate=None):
    '''Returns a nodeset containing a RDF serialization of the
  RxPathDOM nodes contained in resultset parameter.
  
  nsMapString is a namespace dictionary encoded as a string in the form of "prefix^uri^prefix^uri..."
  
  '''
    stmts = []
    uri2prefixMap = None
    if nsMapString:
        import itertools
        nslist = StringValue(nsMapString).split('^')
        uri2prefixMap = dict(
            itertools.izip(itertools.islice(nslist, 1, None, 2),
                           itertools.islice(nslist, 0, None, 2)))
    if resultset:
        if uri2prefixMap is None:
            uri2prefixMap = resultset[0].rootNode.nsRevMap
        if resultset[0].nodeName == '#document':
            resultset = resultset[0].childNodes

        for n in resultset:
            nl = [n]
            if RxPath.isResource(context, nl):
                preds = n.childNodes
            elif RxPath.isPredicate(context, nl):
                preds = nl
            else:
                preds = []  #error?

            for p in preds:
                stmts.extend(p.getModelStatements())
                if (RxPath.isResource(context, p.childNodes)
                        and p.firstChild.isCompound()):
                    #object is a list so add all the list items too
                    stmts.extend(p.firstChild.getModelStatements())

    return RxPath.serializeRDF(stmts, type, uri2prefixMap, fixUp,
                               fixUpPredicate)
Ejemplo n.º 9
0
 def eval(l, node):
     mapContext.node = node
     mapContext.position += 1
     mapContext.varBindings[(RXWIKI_XPATH_EXT_NS, 'current')] = node
     result = RxPath.evalXPath(xpath, mapContext, expCache, queryCache)
     if type(result) != type([]):
         if not isinstance(result, unicode):
             result = unicode(str(result), 'utf8')
         result = String2NodeSet(mapContext, result)
     l.extend(result)
     return l
Ejemplo n.º 10
0
def getResource(s, rxNSPrefix, nsMap, thisResource):
    typeName = None
    if matchName(s, rxNSPrefix, 'this-resource'):
        assert thisResource
        resource = thisResource
    elif matchName(s, rxNSPrefix, 'resource'):
        id = s.getAttributeNS(EMPTY_NAMESPACE, 'id')
        resource = id
        if not id:
            resource = RxPath.generateBnode()
    else:
        #deprecated if the element has an id element treat as the resource URI ref and the element name as the class type
        id = getAttributefromQName(s, rxNSPrefix, 'id')
        if id is not None:
            resource = id
            if not resource:
                resource = RxPath.generateBnode()
            typeName = getURIFromElementName(s, nsMap)
        else:
            resource = getURIFromElementName(s, nsMap)
    return resource, typeName
Ejemplo n.º 11
0
def rx2model(path, url=None, debug=0, namespaceAware=0, scope=''):
    '''
    Parse the RxML and returns a 4Suite model containing its statements.
    '''
    from xml.dom import expatbuilder

    if url:
        isrc = InputSource.DefaultFactory.fromUri(url)
        src = isrc.stream
    else:
        src = path
    doc = expatbuilder.parse(src, namespaces=namespaceAware)

    outputModel = RxPath.MemModel()

    nsMap = addRxdom2Model(doc,
                           outputModel,
                           thisResource='wikiwiki:',
                           scope=scope)
    return outputModel, nsMap
Ejemplo n.º 12
0
    def testCaching(self):
        root = raccoon.RequestProcessor(a='testMinimalApp.py',
                                        model_uri='test:')
        from rx import RxPath
        from Ft.Xml import XPath
        node = root.domStore.dom
        kw = {'url': 'foo:', '__server__': root}
        vars, extFunMap = root.mapToXPathVars(kw)
        context = XPath.Context.Context(node,
                                        varBindings=vars,
                                        extFunctionMap=extFunMap,
                                        processorNss=raccoon.DefaultNsMap)
        xpath = "wf:get-metadata('url')"
        compExpr = RxPath._compileXPath(xpath, context)
        key = raccoon.getKeyFromXPathExp(compExpr, context,
                                         root.NOT_CACHEABLE_FUNCTIONS)
        self.failUnless(key == ('wf:get-metadata("url")', (None, u'url'),
                                'foo:', node.getKey()))

        styleSheetContents = '''
        <x:stylesheet version="1.0" xmlns:x="http://www.w3.org/1999/XSL/Transform"
                 xmlns:wf='http://rx4rdf.sf.net/ns/raccoon/xpath-ext#'>
        <x:template match='/'>
        <x:variable name='url' select='wf:get-metadata("url")' />
        </x:template></x:stylesheet>
        '''

        styleSheetKey = raccoon.getXsltCacheKeyPredicate(
            root.styleSheetCache,
            root.NOT_CACHEABLE_FUNCTIONS,
            styleSheetContents,
            '<root />',
            kw,
            node,
            styleSheetUri='test:')

        self.failUnless(styleSheetKey == (styleSheetContents, 'test:',
                                          '<root />', node.getKey(),
                                          ((None, u'url'),
                                           False), ((None, u'url'), 'foo:')))
Ejemplo n.º 13
0
    def getResourceNameFromURI(resNode):
        namespaceURI = resNode.getAttributeNS(RDF_MS_BASE, 'about')
        assert namespaceURI
        prefixURI, rest = RxPath.splitUri(namespaceURI)
        #print >>sys.stderr, 'spl %s %s %s' % (namespaceURI, prefixURI, rest)
        #print revNsMap
        if not rest:
            printResourceElem = True
        elif revNsMap.has_key(prefixURI):
            printResourceElem = False
        #elif resNode.ownerDocument.nsRevMap.has_key(prefixURI):
        #    prefix = resNode.ownerDocument.nsRevMap[prefixURI]
        #    nsMap[prefix] = prefixURI
        #    revNsMap[prefixURI] = prefix
        #    printResourceElem = False
        else:
            printResourceElem = True

        if not printResourceElem:
            prefix = revNsMap[prefixURI]
            if prefix:
                retVal = prefix + ':' + rest
            else:
                retVal = rest
            if fixUp:
                retVal = fixUp % utils.kw2dict(
                    uri=namespaceURI,
                    encodeduri=urllib.quote(namespaceURI),
                    res=retVal)
        else:
            if fixUp:
                namespaceURI = fixUp % utils.kw2dict(
                    uri=namespaceURI,
                    encodeduri=urllib.quote(namespaceURI),
                    res=namespaceURI)
            #retVal = rxPrefix + 'resource id="' + namespaceURI + '"'
            retVal = '{' + namespaceURI + '}'
        return retVal
Ejemplo n.º 14
0
def addList2Model(model, subject, p, listID, scope, getObject=getObject):
    prevListID = None
    for child in p.childNodes:
        if child.nodeType == p.COMMENT_NODE:
            continue
        object, objectType = getObject(child)
        if prevListID:
            listID = RxPath.generateBnode()
            model.addStatement(
                Statement(prevListID, RDF_MS_BASE + 'type',
                          RDF_MS_BASE + 'List', OBJECT_TYPE_RESOURCE, scope))
            model.addStatement(
                Statement(prevListID, RDF_MS_BASE + 'rest', listID,
                          OBJECT_TYPE_RESOURCE, scope))
        model.addStatement(
            Statement(listID, RDF_MS_BASE + 'first', object, objectType,
                      scope))
        prevListID = listID
    model.addStatement(
        Statement(listID, RDF_MS_BASE + 'type', RDF_MS_BASE + 'List',
                  OBJECT_TYPE_RESOURCE, scope))
    model.addStatement(
        Statement(listID, RDF_MS_BASE + 'rest', RDF_MS_BASE + 'nil',
                  OBJECT_TYPE_RESOURCE, scope))
Ejemplo n.º 15
0
    def _to_sjson(self, root, depth=-1, exclude_blankids=False):
        """
        If resource is a references more than once, just the string is output
        RDF lists and containers that are not SJSON sequences
        
        >>> r = Res("http://example.org/book#1"); r['v1'] = 'string'; r['v2'] = 1;
        >>> "http://example.org/book#2"
        >>> r['l'] = [1, 2, 3, 4, 5]; r['r'] = Res('o')
        
        >>> sjson().to_sjson(doc())
        """
        #XXX depth
        #XXX exclude_blankids (but if false, will need to add back if shared)
        
        #use RxPathDom, expensive but arranges as sorted tree, normalizes RDF collections et al. 
        #and is schema aware
        from rx import RxPathDom
        if not isinstance(root, RxPathDom.Node):
            #assume doc is iterator of statements or quad tuples
            #note: order is not preserved
            root = RxPath.createDOM(RxPath.MemModel(root), schemaClass=RxPath.BaseSchema) 
        
        results = []
        seen = {}   
        shared = {}         
        if isinstance(root, (RxPathDom.Document, RxPathDom.DocumentFragment)): #XXX RxPathDom.DocumentFragment
            if isinstance(root, RxPathDom.Document):
                #filter out propseq resources and resources with no properties            
                nodes = [n for n in root.childNodes if n.childNodes and
                            not n.matchName(JSON_BASE,'propseqtype')]
            else:
                nodes = [n for n in root.childNodes]
            #from pprint import pprint
            #pprint(nodes)
            results = [{} for i in xrange(0, len(nodes))]
            todo = [(results, i, n) for i,n in enumerate(nodes)]            
        elif isinstance(root, RxPathDom.Resource):
            results = [ {} ]
            todo = [(results, 0, root)]
        elif isinstance(root, RxPathDom.BasePredicate): 
            obj = p.childNodes[0]
            key = self.QName(root.parentNode.uri)
            propmap = { self.PROPERTYMAP : self.QName(root.stmt.predicate) }
            if isinstance(obj, RxPathDom.Text):
                v = self._value(obj)
                todo = []
            else:
                v = {}                
                todo = [ (propmap, key, obj) ]
            propmap[key] = v
            results = [propmap]
        elif isinstance(root, RxPathDom.Text):
            #return string value
            return self._value(root);
        else:                    
            raise TypeError('Unexpected root node')
 
        def setobj(obj, res, parent, key):
            if isinstance(obj, RxPathDom.Text):
                v = self._value(obj)
                #otherwise its a resource
            elif obj.uri == res.uri: #object is same as subject
                v = self.QName(obj.uri)                
            elif obj.uri == RDF_MS_BASE + 'nil':
                v = [] #empty list
            else: 
                uri = obj.uri                     
                #if an object appears in the tree more than once,
                #replace prior reference with uri 
                #and add to shared     
                prior = seen.get(uri)
                #print 'seen', uri, prior
                if prior:
                    v = self.QName(uri)
                    shared[v] = prior[0][ prior[1] ]
                    prior[0][ prior[1] ] = v            
                else:
                    v = {}
                    #print 'add to seen', uri, type(uri)
                    seen[uri] = (parent, key, v)
                    todo.append( (parent, key, obj) )
            parent[ key ] = v
        
        def setPropSeq(propseq, res):
            #XXX what about empty lists?
            childlist = []
            for p in propseq.childNodes:
                prop = p.stmt.predicate
                obj = p.childNodes[0] 
                if prop == PROPBAG:
                    propbag = obj.uri
                elif prop == RxPath.RDF_SCHEMA_BASE+u'member':
                    childlist.append(0)
                    setobj(obj, res, childlist, len(childlist)-1)         
            return propbag, childlist
    
        while todo:
            #res (the resource) has already been attached to its parent
            #now we need to assign its properties
            parent, key, res = todo.pop()        
            if res.uri not in seen:
                #print 'add subject to seen', res.uri, type(res.uri)
                seen[res.uri] = (parent, key, parent[key])
            else:
                #print 'subject seen', res.uri
                parent[key] = res.uri #replace object with uri reference
                prior = seen[res.uri]
                key = self.QName(res.uri)                
                if key in shared:
                    continue
                else:
                    #add to shared
                    shared[key] = prior[2]
                    parent = shared                
            
            if res.childNodes:
                s = parent[key]
                s[self.ID] = res.uri
            else:
                #no properties, just write out the id, not a dict
                #and don't bother including it in shared
                #XXX but then we can't tell if its an id or just a string?
                if parent is shared:
                    del parent[key]
                else:                    
                    parent[key] = res.uri
                continue
                
            currentlist = []
            propseqs = {}
            
            for p in res.childNodes:  
                prop = p.stmt.predicate
                if prop == PROPSEQ:
                    #this will replace sequences                    
                    seqprop, childlist = setPropSeq(p.childNodes[0], res)
                    s[ self.QName(seqprop) ] = childlist

            for p in res.childNodes:  
                prop = p.stmt.predicate
                if prop == PROPSEQ:
                    continue
                if self.QName(prop) in s:
                    continue #must have be already handled by getPropSeq

                nextMatches = p.nextSibling and p.nextSibling.stmt.predicate == prop                
                #XXX Test empty and singleton rdf lists and containers                                
                if nextMatches or currentlist:
                    parent = currentlist
                    key = len(currentlist)
                    currentlist.append(0)                     
                else:
                    parent = s
                    key = self.QName(prop)
                
                obj = p.childNodes[0]
                setobj(obj, res, parent, key)
                
                if currentlist and not nextMatches:
                    s[ self.QName(prop) ] = currentlist
                    currentlist = [] #list done

        if shared:
            results = { 'results':results, 'shared':shared}

        #if self.nsmap:        
        #    results['prefix'] = self.nsmap
        
        return results
Ejemplo n.º 16
0
 def initModel(location, defaultModel):
     if os.path.exists(location):
         source = location
     else:
         source = defaultModel
     return RxPath.MemModel(source)
Ejemplo n.º 17
0
def modelFromJson(model):
    model = sjson.sjson().to_rdf( { 'results' : model } )
    return RxPath.MemModel(model)
Ejemplo n.º 18
0
    def _saveContents(self,
                      filepath,
                      contents,
                      altfilename=None,
                      indexURI=None,
                      title='',
                      previousRevisionDigest='',
                      maxLiteral=None):
        '''        
        this is kind of ugly; XUpdate doesn't have an eval() function, so we
        build up a string of xml and then parse it and then return the doc as
        a nodeset and use xupdate:copy-of on the nodeset
        '''
        if indexURI:
            self.addToIndex(indexURI, contents, title)
        if maxLiteral is None:
            maxLiteral = self.MAX_MODEL_LITERAL

        #print >>sys.stderr, 'sc', filepath, title, contents
        ns = '''xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'
            xmlns:a="http://rx4rdf.sf.net/ns/archive#"
            xmlns:wiki="http://rx4rdf.sf.net/ns/wiki#"'''
        contentLength = len(contents)
        if filepath and maxLiteral > -1 and contentLength > maxLiteral:
            #save as file
            dir = os.path.split(filepath)[0]
            try:
                os.makedirs(dir)
            except OSError:
                pass  #dir might already exist

            ff = TxnFileFactory(filepath)
            self.server.txnSvc.join(ff)
            f = ff.create('b')
            #f = file(filepath, 'wb')
            f.write(contents)
            f.close()

            digest = utils.shaDigestString(contents)

            if altfilename and self.ALTSAVE_DIR:
                #we save another copy of the last revision in a location that
                #that can be safely accessed and modified by external programs
                #without breaking diffs etc.
                altfilepath = os.path.join(self.ALTSAVE_DIR, altfilename)
                abspath = os.path.abspath(altfilepath)
                prefixlen = len(
                    InputSource.DefaultFactory.resolver.getPrefix(abspath))
                assert prefixlen, ("filepath %s must be on Raccoon's PATH" %
                                   abspath)
                altPathURI = raccoon.SiteUriResolver.OsPathToPathUri(
                    abspath[prefixlen + 1:])

                #if the altfilepath already exists compare its digest with the previous
                #revisions digest and don't overwrite this file if they don't match
                #-- instead add a wiki:save-conflict property to the new contentlocation.
                if os.path.exists(altfilepath):
                    existingDigest = utils.shaDigest(altfilepath)
                    if existingDigest == digest:
                        #identical to the contents, so no need to write
                        saveAltFile = False
                        conflict = False
                    else:
                        conflict = True
                        if previousRevisionDigest:
                            #if these are equal, its ok to overwrite
                            saveAltFile = previousRevisionDigest == existingDigest
                        else:
                            saveAltFile = False
                else:
                    saveAltFile = True

                altContents = ("<wiki:alt-contents><a:ContentLocation "
                               "rdf:about='%s' /></wiki:alt-contents>" %
                               altPathURI)
                if saveAltFile:
                    dir = os.path.split(altfilepath)[0]
                    try:
                        os.makedirs(dir)
                    except OSError:
                        pass  #dir might already exist

                    ff = TxnFileFactory(altfilepath)
                    self.server.txnSvc.join(ff)
                    f = ff.create('b')
                    #f = file(altfilepath, 'wb')
                    f.write(contents)
                    f.close()
                elif conflict:
                    self.log.warning(
                        "conflict trying to save revision to ALTSAVE_DIR: "
                        "unrecognized contents at %s" % altfilepath)
                    altContents = ("<wiki:save-conflict><a:ContentLocation "
                                   "rdf:about='%s' /></wiki:save-conflict>" %
                                   altPathURI)
            else:
                altContents = ''

            contentProps = ("<a:content-length>%u</a:content-length>"
                            "<a:sha1-digest>%s</a:sha1-digest>" %
                            (contentLength, digest))

            abspath = os.path.abspath(filepath)
            prefixlen = len(
                InputSource.DefaultFactory.resolver.getPrefix(abspath))
            assert prefixlen, ("filepath %s must be on Raccoon's PATH" %
                               abspath)
            filepathURI = raccoon.SiteUriResolver.OsPathToPathUri(
                abspath[prefixlen + 1:])
            #print >>sys.stderr, abspath, abspath[prefixlen+1:], prefixlen, filepathURI
            xml = ("<a:ContentLocation %(ns)s rdf:about='%(filepathURI)s'>"
                   "%(contentProps)s%(altContents)s</a:ContentLocation>" %
                   locals())
        else:  #save the contents inside the model
            try:
                if isinstance(contents, str):
                    #test to see if the string can be treated as utf8
                    contents.decode('utf8')
                return contents
                #contents = utils.htmlQuote(contents)
                #xml = '''<a:Content rdf:about='%(sha1urn)'>%(contentProps)s<a:contents>%(contents)s</a:contents></<a:Content>''' % locals()
            except UnicodeError:
                #could be binary, base64 encode
                encodedURI = RxPath.generateBnode()
                contents = base64.encodestring(contents)
                xml = (
                    "<a:ContentTransform %(ns)s rdf:about='%(encodedURI)s'>"
                    "<a:transformed-by>"
                    "<rdf:Description rdf:about='http://www.w3.org/2000/09/xmldsig#base64'/>"
                    "</a:transformed-by>"
                    "<a:contents>%(contents)s</a:contents>"
                    "</a:ContentTransform>" % locals())

        #print >>sys.stderr, 'sc', xml
        from Ft.Xml import Domlette
        #why can't InputSource accept unicode? lame (thus we don't support unicode filenames right now)
        isrc = InputSource.DefaultFactory.fromString(str(xml), 'file:')
        xmlDoc = Domlette.NonvalidatingReader.parse(isrc)
        #return a nodeset containing the root element of the doc
        #print >>sys.stderr, 'sc', xmlDoc.documentElement
        return [xmlDoc.documentElement]
Ejemplo n.º 19
0
def GenerateBnode(context, name=None):
    if name is not None:
        name = StringValue(name)
    return RxPath.generateBnode(name)
Ejemplo n.º 20
0
def rxml2RxPathDOM(path, url=None, debug=0, namespaceAware=0):
    outputModel, nsMap = rx2model(path, url, debug, namespaceAware)
    #todo: bug! revNsMap doesn't work with 2 prefixes one ns
    revNsMap = dict(map(lambda x: (x[1], x[0]),
                        nsMap.items()))  #uri to prefix namespace map
    return RxPath.createDOM(outputModel, revNsMap)
Ejemplo n.º 21
0
    def outputPredicate(predNode, indent):
        if revNsMap.has_key(predNode.namespaceURI):
            prefix = revNsMap[predNode.namespaceURI]
        else:
            prefix = predNode.prefix
            nsMap[prefix] = predNode.namespaceURI
            revNsMap[predNode.namespaceURI] = prefix

        if predNode.namespaceURI == RDF_MS_BASE and predNode.localName == 'type':
            predicateString = rxPrefix + 'a'  #use rx:a instead rdf:type
        elif prefix:
            predicateString = prefix + ':' + predNode.localName
        else:
            predicateString = predNode.localName

        if fixUpPredicate:
            predURI = RxPath.getURIFromElementName(predNode)
            eu = urllib.quote(predURI)
            predicateString = fixUpPredicate % utils.kw2dict(
                uri=predURI, encodeduri=eu, predicate=predicateString)

        line = indent + predicateString

        id = predNode.getAttributeNS(RDF_MS_BASE, 'ID')
        if id:
            line += ' ' + rxPrefix + RX_STMTID_ATTRIB + '="' + id + '"'

        assert len(predNode.childNodes) == 1
        if predNode.childNodes[0].nodeType == predNode.TEXT_NODE:
            lang = predNode.getAttributeNS(XML_NAMESPACE, 'lang')
            datatype = predNode.getAttributeNS(RDF_MS_BASE, 'datatype')
            if lang or datatype:
                line += ': '
                line += NL
                indent += INDENT
                line += indent + rxPrefix + RX_LITERALELEM
                if lang:
                    line += ' xml:lang="' + lang + '"'
                if datatype:
                    #note we don't bother to check if its xml literal and parse and output as zml
                    line += ' rdf:datatype="' + datatype + '"'
            line += ': '
            line += doQuote(predNode.childNodes[0].nodeValue) + NL
        else:
            object = predNode.childNodes[0]
            isList = object.isCompound()
            if isList:
                line += ' ' + rxPrefix + 'list="' + object.getAttributeNS(
                    RDF_MS_BASE, 'about') + '"'
                isList = isList[len(RDF_MS_BASE):]
                if isList != 'List':
                    assert isList in ['Alt', 'Seq',
                                      'Bag'], 'isList should not be ' + isList
                    line += ' ' + rxPrefix + 'listType="rdf:' + isList + '"'

            line += ': '
            line += NL
            indent += INDENT

            if isList:  #is the object a list resource?
                for li in [
                        p.childNodes[0] for p in object.childNodes
                        if RxPath.getURIFromElementName(p) in
                    [RDF_MS_BASE + 'first', RDF_SCHEMA_BASE + 'member']
                ]:

                    if li.nodeType == li.TEXT_NODE:
                        lang = li.parentNode.getAttributeNS(
                            XML_NAMESPACE, 'lang')
                        datatype = li.parentNode.getAttributeNS(
                            RDF_MS_BASE, 'datatype')
                        if lang:
                            attr = ' xml:lang="' + lang + '"'
                        elif datatype:
                            #note we don't bother to check if its xml literal and parse and output as zml
                            attr = ' rdf:datatype="' + datatype + '"'
                        else:
                            attr = ''
                        line += indent + rxPrefix + RX_LITERALELEM + attr + ':' + doQuote(
                            li.nodeValue) + NL
                    elif li.nodeType == li.ELEMENT_NODE:
                        line += indent + getResourceNameFromURI(li) + NL
            else:
                line += indent + getResourceNameFromURI(object) + NL

        return line
Ejemplo n.º 22
0
def getRXAsZMLFromNode(resourceNodes,
                       nsMap=None,
                       includeRoot=False,
                       INDENT='    ',
                       NL='\n',
                       INITINDENT='',
                       rescomment='',
                       fixUp=None,
                       fixUpPredicate=None):
    '''given a nodeset of RxPathDom nodes, return RxML serialization in ZML markup format'''
    def getResourceNameFromURI(resNode):
        namespaceURI = resNode.getAttributeNS(RDF_MS_BASE, 'about')
        assert namespaceURI
        prefixURI, rest = RxPath.splitUri(namespaceURI)
        #print >>sys.stderr, 'spl %s %s %s' % (namespaceURI, prefixURI, rest)
        #print revNsMap
        if not rest:
            printResourceElem = True
        elif revNsMap.has_key(prefixURI):
            printResourceElem = False
        #elif resNode.ownerDocument.nsRevMap.has_key(prefixURI):
        #    prefix = resNode.ownerDocument.nsRevMap[prefixURI]
        #    nsMap[prefix] = prefixURI
        #    revNsMap[prefixURI] = prefix
        #    printResourceElem = False
        else:
            printResourceElem = True

        if not printResourceElem:
            prefix = revNsMap[prefixURI]
            if prefix:
                retVal = prefix + ':' + rest
            else:
                retVal = rest
            if fixUp:
                retVal = fixUp % utils.kw2dict(
                    uri=namespaceURI,
                    encodeduri=urllib.quote(namespaceURI),
                    res=retVal)
        else:
            if fixUp:
                namespaceURI = fixUp % utils.kw2dict(
                    uri=namespaceURI,
                    encodeduri=urllib.quote(namespaceURI),
                    res=namespaceURI)
            #retVal = rxPrefix + 'resource id="' + namespaceURI + '"'
            retVal = '{' + namespaceURI + '}'
        return retVal

    def outputPredicate(predNode, indent):
        if revNsMap.has_key(predNode.namespaceURI):
            prefix = revNsMap[predNode.namespaceURI]
        else:
            prefix = predNode.prefix
            nsMap[prefix] = predNode.namespaceURI
            revNsMap[predNode.namespaceURI] = prefix

        if predNode.namespaceURI == RDF_MS_BASE and predNode.localName == 'type':
            predicateString = rxPrefix + 'a'  #use rx:a instead rdf:type
        elif prefix:
            predicateString = prefix + ':' + predNode.localName
        else:
            predicateString = predNode.localName

        if fixUpPredicate:
            predURI = RxPath.getURIFromElementName(predNode)
            eu = urllib.quote(predURI)
            predicateString = fixUpPredicate % utils.kw2dict(
                uri=predURI, encodeduri=eu, predicate=predicateString)

        line = indent + predicateString

        id = predNode.getAttributeNS(RDF_MS_BASE, 'ID')
        if id:
            line += ' ' + rxPrefix + RX_STMTID_ATTRIB + '="' + id + '"'

        assert len(predNode.childNodes) == 1
        if predNode.childNodes[0].nodeType == predNode.TEXT_NODE:
            lang = predNode.getAttributeNS(XML_NAMESPACE, 'lang')
            datatype = predNode.getAttributeNS(RDF_MS_BASE, 'datatype')
            if lang or datatype:
                line += ': '
                line += NL
                indent += INDENT
                line += indent + rxPrefix + RX_LITERALELEM
                if lang:
                    line += ' xml:lang="' + lang + '"'
                if datatype:
                    #note we don't bother to check if its xml literal and parse and output as zml
                    line += ' rdf:datatype="' + datatype + '"'
            line += ': '
            line += doQuote(predNode.childNodes[0].nodeValue) + NL
        else:
            object = predNode.childNodes[0]
            isList = object.isCompound()
            if isList:
                line += ' ' + rxPrefix + 'list="' + object.getAttributeNS(
                    RDF_MS_BASE, 'about') + '"'
                isList = isList[len(RDF_MS_BASE):]
                if isList != 'List':
                    assert isList in ['Alt', 'Seq',
                                      'Bag'], 'isList should not be ' + isList
                    line += ' ' + rxPrefix + 'listType="rdf:' + isList + '"'

            line += ': '
            line += NL
            indent += INDENT

            if isList:  #is the object a list resource?
                for li in [
                        p.childNodes[0] for p in object.childNodes
                        if RxPath.getURIFromElementName(p) in
                    [RDF_MS_BASE + 'first', RDF_SCHEMA_BASE + 'member']
                ]:

                    if li.nodeType == li.TEXT_NODE:
                        lang = li.parentNode.getAttributeNS(
                            XML_NAMESPACE, 'lang')
                        datatype = li.parentNode.getAttributeNS(
                            RDF_MS_BASE, 'datatype')
                        if lang:
                            attr = ' xml:lang="' + lang + '"'
                        elif datatype:
                            #note we don't bother to check if its xml literal and parse and output as zml
                            attr = ' rdf:datatype="' + datatype + '"'
                        else:
                            attr = ''
                        line += indent + rxPrefix + RX_LITERALELEM + attr + ':' + doQuote(
                            li.nodeValue) + NL
                    elif li.nodeType == li.ELEMENT_NODE:
                        line += indent + getResourceNameFromURI(li) + NL
            else:
                line += indent + getResourceNameFromURI(object) + NL

        return line

    if fixUp:  #if fixUp we assume we're outputing xml/html not zml
        doQuote = lambda s: '`' + utils.htmlQuote(s)
    else:
        doQuote = quoteString
    if nsMap is None:
        nsMap = {'bnode': BNODE_BASE, RX_META_DEFAULT: RX_NS}
    revNsMap = dict([(x[1], x[0]) for x in nsMap.items()
                     if x[0] and ':' not in x[0]
                     and x[0] not in [RX_META_DEFAULT, RX_BASE_DEFAULT]])
    if nsMap.has_key(RX_META_DEFAULT):
        revNsMap[nsMap[RX_META_DEFAULT]] = ''

    rxPrefix = revNsMap.get(RX_NS, 'rx')
    if rxPrefix: rxPrefix += ':'

    indent = INITINDENT
    line = prefixes = root = ''

    if includeRoot:
        indent += INDENT
        root += '#?zml0.7 markup' + NL
        root += INITINDENT + rxPrefix + 'rx:' + NL
    elif not fixUp:  #if fixUp we assume we're outputing xml/html not zml
        root += '#?zml0.7 markup' + NL

    if not isinstance(resourceNodes, (list, tuple)):
        resourceNodes = [resourceNodes]

    for resourceNode in resourceNodes:
        if RxPath.isPredicate(None, [resourceNode]):
            predicateNodes = [resourceNode]
            resourceNode = resourceNode.parentNode
        else:
            predicateNodes = resourceNode.childNodes

        line += indent + getResourceNameFromURI(resourceNode) + ':'
        if rescomment:
            line += ' #' + rescomment
        line += NL
        for p in predicateNodes:
            line += outputPredicate(p, indent + INDENT)
        line += NL

    if nsMap:
        prefixes = indent + rxPrefix + 'prefixes:' + NL
        for prefix, ns in nsMap.items():
            prefixes += indent + INDENT + prefix + ': `' + ns + NL
        prefixes += NL

    return root + prefixes + line
Ejemplo n.º 23
0
def addResource(model,
                scope,
                resource,
                resourceElem,
                rxNSPrefix,
                nsMap,
                thisResource,
                noStmtIds=False):
    '''
    add the children of a RXML resource element to the model
    '''
    for p in resourceElem.childNodes:
        if p.nodeType != p.ELEMENT_NODE:
            continue

        if matchName(p, rxNSPrefix, 'resource'):
            predicate = p.getAttributeNS(EMPTY_NAMESPACE, 'id')
        elif matchName(p, rxNSPrefix, 'a'):  #alias for rdf:type
            predicate = RDF_MS_BASE + 'type'
        else:
            predicate = getURIFromElementName(p, nsMap)

        id = getAttributefromQName(p, rxNSPrefix, RX_STMTID_ATTRIB)
        if not id: id = p.getAttributeNS(EMPTY_NAMESPACE, RX_STMTID_ATTRIB)
        if id and noStmtIds:
            raise RxMLError(RX_STMTID_ATTRIB +
                            ' attribute found at illegal location')
        if id:
            raise RxMLError(RX_STMTID_ATTRIB + ' attribute not yet supported')

        object = getAttributefromQName(p, rxNSPrefix,
                                       'res')  #this is deprecated
        if object:
            objectType = OBJECT_TYPE_RESOURCE
        elif (getAttributefromQName(p, rxNSPrefix, 'list') is not None
              or getAttributefromQName(p, {'': EMPTY_NAMESPACE},
                                       'list') is not None
              or getAttributefromQName(p, rxNSPrefix, 'listType') is not None
              or getAttributefromQName(
                  p, {'': EMPTY_NAMESPACE}, 'listType') is not None or len([
                      c for c in p.childNodes if c.nodeType != p.COMMENT_NODE
                      and c.nodeValue and c.nodeValue.strip()
                  ]) > 1):
            #the object of this predicate is a list
            listID = getAttributefromQName(p, rxNSPrefix, 'list')
            if not listID:
                listID = p.getAttributeNS(EMPTY_NAMESPACE, 'list')
            if not listID:
                listID = RxPath.generateBnode()
            model.addStatement(
                Statement(resource, predicate, listID, OBJECT_TYPE_RESOURCE,
                          scope))
            listType = getAttributefromQName(p, rxNSPrefix, 'listType')
            if not listID:
                listType = p.getAttributeNS(EMPTY_NAMESPACE, 'listType')
            getObjectFunc = lambda elem: getObject(elem, rxNSPrefix, nsMap,
                                                   thisResource)
            if not listType or listType == 'rdf:List':
                addList2Model(model, resource, p, listID, scope, getObjectFunc)
            else:
                addContainer2Model(model, resource, p, listID, scope,
                                   getObjectFunc, listType)
            continue
        else:  #object is a a literal or resource
            childNodes = [
                child for child in p.childNodes
                if child.nodeType != child.COMMENT_NODE
            ]
            if not childNodes:
                #if predicate has no child we assume its an empty literal
                #this could be the result of the common error with ZML
                #where the ':' was missing after the predicate
                invalidAttrLocalNames = [
                    attName[1] for attName in p.attributes.keys()
                    if attName[1] not in [RX_STMTID_ATTRIB, 'id']
                ]
                if invalidAttrLocalNames:
                    #there's an attribute that not either 'stmtid' or 'rdf:id'
                    raise RxMLError('invalid attribute ' +
                                    invalidAttrLocalNames[0] +
                                    ' on predicate element ' + p.localName +
                                    ' -- did you forget a ":"?')
                object, objectType = "", OBJECT_TYPE_LITERAL
            else:
                assert len(childNodes) == 1, p
                object, objectType = getObject(childNodes[0], rxNSPrefix,
                                               nsMap, thisResource)
        #print >>sys.stderr, 'adding ', repr(resource), repr(predicate), object,
        model.addStatement(
            Statement(resource, predicate, object, objectType, scope))
Ejemplo n.º 24
0
    def to_rdf(self, json):
        scope = ''
        m = RxPath.MemModel()
        
        if isinstance(json, (str,unicode)):
            todo = json = json.loads(json)            
        if isinstance(json, dict):
            todo = [r for r in json.get('results',[])
                              if isinstance(r, dict)]
            if 'shared' in json:
                todo.extend( json['shared'].values() )            
        else:
            todo = json 
        if not isinstance(todo, list):
            raise TypeError('whats this?')

        #nsmapstack = [ self.nsmap.copy() ]
        nsmap = self.nsmap
        
        def getorsetid(obj):
            #nsmap = nsmapstack.pop()
            nsmapprop = _expandqname('nsmap', nsmap) 
            nsmapval = obj.get(nsmapprop)
            if nsmapval is not None:
                pass #XXX update stack            
            idprop = _expandqname('id', nsmap) 
            id = obj.get(idprop)
            if id is None:
                id = self._blank() #XXX
                obj[idprop] = id
            return id, idprop
        
        while todo:
            obj = todo.pop()
            #XXX if obj.nsmap: push nsmap
            #XXX propmap
            #XXX idmap
            id, idprop = getorsetid(obj) 
                            
            for prop, val in obj.items():
                if prop == idprop:                    
                    continue
                prop = _expandqname(prop, nsmap)
                
                if isinstance(val, dict):
                    objid, idprop = getorsetid(val) 
                    m.addStatement( Statement(id, prop, objid, OBJECT_TYPE_RESOURCE, scope) )    
                    todo.push(val)
                elif isinstance(val, list):
                    #dont build a PROPSEQTYPE if prop in rdf:_ rdf:first rdfs:member                
                    specialprop = prop.startswith(RDF_MS_BASE+'_') or prop in [
                                  RDF_MS_BASE+'first', RDF_SCHEMA_BASE+'member']
                    #XXX special handling for prop == PROPSEQ ?
                    if not specialprop:
                        if not val:
                            m.addStatement( Statement(id, prop, RDF_MS_BASE+'nil', 
                                OBJECT_TYPE_RESOURCE, scope) )
                        else:  
                            seq = self._blank() 
                            m.addStatement( Statement(seq, RDF_MS_BASE+'type', 
                                RDF_MS_BASE+'Seq', OBJECT_TYPE_RESOURCE, scope) )
                            m.addStatement( Statement(seq, RDF_MS_BASE+'type', 
                                PROPSEQTYPE, OBJECT_TYPE_RESOURCE, scope) )
                            m.addStatement( Statement(seq, PROPBAG, prop, 
                                OBJECT_TYPE_RESOURCE, scope) )
                            m.addStatement( Statement(id, PROPSEQ, seq, OBJECT_TYPE_RESOURCE, scope) )
                    for i, item in enumerate(val):
                        if isinstance(item, dict):
                            itemid, idprop = getorsetid(val) #XXX
                            m.addStatement( Statement(id, prop, itemid, OBJECT_TYPE_RESOURCE, scope) )  
                            if not specialprop:
                                m.addStatement( Statement(seq, 
                                    RDF_MS_BASE+'_'+str(i+1), id, OBJECT_TYPE_RESOURCE, scope) )
                            todo.push(val)
                        elif isinstance(item, list):                        
                            pass #XXX nested lists: add JSONSEQ
                        else:
                            #simple type
                            if self.lookslikeUriOrQname(item):
                                objecttype = OBJECT_TYPE_RESOURCE
                            else:
                                objecttype = OBJECT_TYPE_LITERAL
                            m.addStatement( Statement(id, prop, item, objecttype, scope) )
                            if not specialprop:
                                m.addStatement( Statement(seq, RDF_MS_BASE+'_'+str(i+1), item, objecttype, scope) )
                else: #simple type
                    if self.lookslikeUriOrQname(val):
                        objecttype = OBJECT_TYPE_RESOURCE
                    else:
                        objecttype = OBJECT_TYPE_LITERAL
                    m.addStatement( Statement(id, prop, val, objecttype, scope) )                    

        return m.getStatements()
Ejemplo n.º 25
0
    def loadDom(self, requestProcessor):
        self.log = logging.getLogger("domstore." + requestProcessor.appName)

        normalizeSource = getattr(self.modelFactory, 'normalizeSource',
                                  DomStore._normalizeSource)
        source = normalizeSource(self, requestProcessor, self.STORAGE_PATH)

        modelUri = requestProcessor.MODEL_RESOURCE_URI
        if self.saveHistory:
            from rx import RxPathGraph
            initCtxUri = RxPathGraph.getTxnContextUri(modelUri, 0)
        else:
            initCtxUri = ''
        defaultStmts = RxPath.NTriples2Statements(self.defaultTripleStream,
                                                  initCtxUri)

        if self.VERSION_STORAGE_PATH:
            normalizeSource = getattr(self.versionModelFactory,
                                      'normalizeSource',
                                      DomStore._normalizeSource)
            versionStoreSource = normalizeSource(self, requestProcessor,
                                                 self.VERSION_STORAGE_PATH)
            delmodel = self.versionModelFactory(source=versionStoreSource,
                                                defaultStatements=[])
        else:
            delmodel = None

        #note: to override loadNtriplesIncrementally, set this attribute
        #on your custom modelFactory function
        if self.saveHistory and getattr(self.modelFactory,
                                        'loadNtriplesIncrementally', False):
            if not delmodel:
                delmodel = RxPath.MemModel()
            dmc = RxPathGraph.DeletionModelCreator(delmodel)
            model = self.modelFactory(source=source,
                                      defaultStatements=defaultStmts,
                                      incrementHook=dmc)
            lastScope = dmc.lastScope
        else:
            model = self.modelFactory(source=source,
                                      defaultStatements=defaultStmts)
            lastScope = None

        if self.APPLICATION_MODEL:
            appTriples = StringIO.StringIO(self.APPLICATION_MODEL)
            stmtGen = RxPath.NTriples2Statements(appTriples,
                                                 RxPathGraph.APPCTX)
            appmodel = RxPath.MemModel(stmtGen)
            model = RxPath.MultiModel(model, appmodel)

        if self.transactionLog:
            model = RxPath.MirrorModel(
                model,
                RxPath.IncrementalNTriplesFileModel(self.transactionLog, []))

        if self.saveHistory:
            graphManager = RxPathGraph.NamedGraphManager(
                model, delmodel, lastScope)
        else:
            graphManager = None

        #reverse namespace map #todo: bug! revNsMap doesn't work with 2 prefixes one ns
        revNsMap = dict([(x[1], x[0]) for x in requestProcessor.nsMap.items()])
        self.dom = RxPath.createDOM(model,
                                    revNsMap,
                                    modelUri=modelUri,
                                    schemaClass=self.schemaFactory,
                                    graphManager=graphManager)
        self.dom.addTrigger = self.addTrigger
        self.dom.removeTrigger = self.removeTrigger
        self.dom.newResourceTrigger = self.newResourceTrigger

        #associate the queryCache with the DOM Document
        self.dom.queryCache = requestProcessor.queryCache
Ejemplo n.º 26
0
 def evalXPath(self, xpath, context, expCache=None, queryCache=None):
     self.log.debug(xpath)
     return RxPath.evalXPath(xpath, context, expCache, queryCache)