def rx2model(path, url=None, debug=0, namespaceAware=0, scope=''): ''' Parse the RxML and returns a 4Suite model containing its statements. ''' from xml.dom import expatbuilder if url: isrc = InputSource.DefaultFactory.fromUri(url) src = isrc.stream else: src = path doc = expatbuilder.parse(src, namespaces=namespaceAware) outputModel = RxPath.MemModel() nsMap = addRxdom2Model(doc, outputModel, thisResource='wikiwiki:', scope=scope) return outputModel, nsMap
def initModel(location, defaultModel): if os.path.exists(location): source = location else: source = defaultModel return RxPath.MemModel(source)
def modelFromJson(model): model = sjson.sjson().to_rdf( { 'results' : model } ) return RxPath.MemModel(model)
def loadDom(self, requestProcessor): self.log = logging.getLogger("domstore." + requestProcessor.appName) normalizeSource = getattr(self.modelFactory, 'normalizeSource', DomStore._normalizeSource) source = normalizeSource(self, requestProcessor, self.STORAGE_PATH) modelUri = requestProcessor.MODEL_RESOURCE_URI if self.saveHistory: from rx import RxPathGraph initCtxUri = RxPathGraph.getTxnContextUri(modelUri, 0) else: initCtxUri = '' defaultStmts = RxPath.NTriples2Statements(self.defaultTripleStream, initCtxUri) if self.VERSION_STORAGE_PATH: normalizeSource = getattr(self.versionModelFactory, 'normalizeSource', DomStore._normalizeSource) versionStoreSource = normalizeSource(self, requestProcessor, self.VERSION_STORAGE_PATH) delmodel = self.versionModelFactory(source=versionStoreSource, defaultStatements=[]) else: delmodel = None #note: to override loadNtriplesIncrementally, set this attribute #on your custom modelFactory function if self.saveHistory and getattr(self.modelFactory, 'loadNtriplesIncrementally', False): if not delmodel: delmodel = RxPath.MemModel() dmc = RxPathGraph.DeletionModelCreator(delmodel) model = self.modelFactory(source=source, defaultStatements=defaultStmts, incrementHook=dmc) lastScope = dmc.lastScope else: model = self.modelFactory(source=source, defaultStatements=defaultStmts) lastScope = None if self.APPLICATION_MODEL: appTriples = StringIO.StringIO(self.APPLICATION_MODEL) stmtGen = RxPath.NTriples2Statements(appTriples, RxPathGraph.APPCTX) appmodel = RxPath.MemModel(stmtGen) model = RxPath.MultiModel(model, appmodel) if self.transactionLog: model = RxPath.MirrorModel( model, RxPath.IncrementalNTriplesFileModel(self.transactionLog, [])) if self.saveHistory: graphManager = RxPathGraph.NamedGraphManager( model, delmodel, lastScope) else: graphManager = None #reverse namespace map #todo: bug! revNsMap doesn't work with 2 prefixes one ns revNsMap = dict([(x[1], x[0]) for x in requestProcessor.nsMap.items()]) self.dom = RxPath.createDOM(model, revNsMap, modelUri=modelUri, schemaClass=self.schemaFactory, graphManager=graphManager) self.dom.addTrigger = self.addTrigger self.dom.removeTrigger = self.removeTrigger self.dom.newResourceTrigger = self.newResourceTrigger #associate the queryCache with the DOM Document self.dom.queryCache = requestProcessor.queryCache
def to_rdf(self, json): scope = '' m = RxPath.MemModel() if isinstance(json, (str,unicode)): todo = json = json.loads(json) if isinstance(json, dict): todo = [r for r in json.get('results',[]) if isinstance(r, dict)] if 'shared' in json: todo.extend( json['shared'].values() ) else: todo = json if not isinstance(todo, list): raise TypeError('whats this?') #nsmapstack = [ self.nsmap.copy() ] nsmap = self.nsmap def getorsetid(obj): #nsmap = nsmapstack.pop() nsmapprop = _expandqname('nsmap', nsmap) nsmapval = obj.get(nsmapprop) if nsmapval is not None: pass #XXX update stack idprop = _expandqname('id', nsmap) id = obj.get(idprop) if id is None: id = self._blank() #XXX obj[idprop] = id return id, idprop while todo: obj = todo.pop() #XXX if obj.nsmap: push nsmap #XXX propmap #XXX idmap id, idprop = getorsetid(obj) for prop, val in obj.items(): if prop == idprop: continue prop = _expandqname(prop, nsmap) if isinstance(val, dict): objid, idprop = getorsetid(val) m.addStatement( Statement(id, prop, objid, OBJECT_TYPE_RESOURCE, scope) ) todo.push(val) elif isinstance(val, list): #dont build a PROPSEQTYPE if prop in rdf:_ rdf:first rdfs:member specialprop = prop.startswith(RDF_MS_BASE+'_') or prop in [ RDF_MS_BASE+'first', RDF_SCHEMA_BASE+'member'] #XXX special handling for prop == PROPSEQ ? if not specialprop: if not val: m.addStatement( Statement(id, prop, RDF_MS_BASE+'nil', OBJECT_TYPE_RESOURCE, scope) ) else: seq = self._blank() m.addStatement( Statement(seq, RDF_MS_BASE+'type', RDF_MS_BASE+'Seq', OBJECT_TYPE_RESOURCE, scope) ) m.addStatement( Statement(seq, RDF_MS_BASE+'type', PROPSEQTYPE, OBJECT_TYPE_RESOURCE, scope) ) m.addStatement( Statement(seq, PROPBAG, prop, OBJECT_TYPE_RESOURCE, scope) ) m.addStatement( Statement(id, PROPSEQ, seq, OBJECT_TYPE_RESOURCE, scope) ) for i, item in enumerate(val): if isinstance(item, dict): itemid, idprop = getorsetid(val) #XXX m.addStatement( Statement(id, prop, itemid, OBJECT_TYPE_RESOURCE, scope) ) if not specialprop: m.addStatement( Statement(seq, RDF_MS_BASE+'_'+str(i+1), id, OBJECT_TYPE_RESOURCE, scope) ) todo.push(val) elif isinstance(item, list): pass #XXX nested lists: add JSONSEQ else: #simple type if self.lookslikeUriOrQname(item): objecttype = OBJECT_TYPE_RESOURCE else: objecttype = OBJECT_TYPE_LITERAL m.addStatement( Statement(id, prop, item, objecttype, scope) ) if not specialprop: m.addStatement( Statement(seq, RDF_MS_BASE+'_'+str(i+1), item, objecttype, scope) ) else: #simple type if self.lookslikeUriOrQname(val): objecttype = OBJECT_TYPE_RESOURCE else: objecttype = OBJECT_TYPE_LITERAL m.addStatement( Statement(id, prop, val, objecttype, scope) ) return m.getStatements()
def _to_sjson(self, root, depth=-1, exclude_blankids=False): """ If resource is a references more than once, just the string is output RDF lists and containers that are not SJSON sequences >>> r = Res("http://example.org/book#1"); r['v1'] = 'string'; r['v2'] = 1; >>> "http://example.org/book#2" >>> r['l'] = [1, 2, 3, 4, 5]; r['r'] = Res('o') >>> sjson().to_sjson(doc()) """ #XXX depth #XXX exclude_blankids (but if false, will need to add back if shared) #use RxPathDom, expensive but arranges as sorted tree, normalizes RDF collections et al. #and is schema aware from rx import RxPathDom if not isinstance(root, RxPathDom.Node): #assume doc is iterator of statements or quad tuples #note: order is not preserved root = RxPath.createDOM(RxPath.MemModel(root), schemaClass=RxPath.BaseSchema) results = [] seen = {} shared = {} if isinstance(root, (RxPathDom.Document, RxPathDom.DocumentFragment)): #XXX RxPathDom.DocumentFragment if isinstance(root, RxPathDom.Document): #filter out propseq resources and resources with no properties nodes = [n for n in root.childNodes if n.childNodes and not n.matchName(JSON_BASE,'propseqtype')] else: nodes = [n for n in root.childNodes] #from pprint import pprint #pprint(nodes) results = [{} for i in xrange(0, len(nodes))] todo = [(results, i, n) for i,n in enumerate(nodes)] elif isinstance(root, RxPathDom.Resource): results = [ {} ] todo = [(results, 0, root)] elif isinstance(root, RxPathDom.BasePredicate): obj = p.childNodes[0] key = self.QName(root.parentNode.uri) propmap = { self.PROPERTYMAP : self.QName(root.stmt.predicate) } if isinstance(obj, RxPathDom.Text): v = self._value(obj) todo = [] else: v = {} todo = [ (propmap, key, obj) ] propmap[key] = v results = [propmap] elif isinstance(root, RxPathDom.Text): #return string value return self._value(root); else: raise TypeError('Unexpected root node') def setobj(obj, res, parent, key): if isinstance(obj, RxPathDom.Text): v = self._value(obj) #otherwise its a resource elif obj.uri == res.uri: #object is same as subject v = self.QName(obj.uri) elif obj.uri == RDF_MS_BASE + 'nil': v = [] #empty list else: uri = obj.uri #if an object appears in the tree more than once, #replace prior reference with uri #and add to shared prior = seen.get(uri) #print 'seen', uri, prior if prior: v = self.QName(uri) shared[v] = prior[0][ prior[1] ] prior[0][ prior[1] ] = v else: v = {} #print 'add to seen', uri, type(uri) seen[uri] = (parent, key, v) todo.append( (parent, key, obj) ) parent[ key ] = v def setPropSeq(propseq, res): #XXX what about empty lists? childlist = [] for p in propseq.childNodes: prop = p.stmt.predicate obj = p.childNodes[0] if prop == PROPBAG: propbag = obj.uri elif prop == RxPath.RDF_SCHEMA_BASE+u'member': childlist.append(0) setobj(obj, res, childlist, len(childlist)-1) return propbag, childlist while todo: #res (the resource) has already been attached to its parent #now we need to assign its properties parent, key, res = todo.pop() if res.uri not in seen: #print 'add subject to seen', res.uri, type(res.uri) seen[res.uri] = (parent, key, parent[key]) else: #print 'subject seen', res.uri parent[key] = res.uri #replace object with uri reference prior = seen[res.uri] key = self.QName(res.uri) if key in shared: continue else: #add to shared shared[key] = prior[2] parent = shared if res.childNodes: s = parent[key] s[self.ID] = res.uri else: #no properties, just write out the id, not a dict #and don't bother including it in shared #XXX but then we can't tell if its an id or just a string? if parent is shared: del parent[key] else: parent[key] = res.uri continue currentlist = [] propseqs = {} for p in res.childNodes: prop = p.stmt.predicate if prop == PROPSEQ: #this will replace sequences seqprop, childlist = setPropSeq(p.childNodes[0], res) s[ self.QName(seqprop) ] = childlist for p in res.childNodes: prop = p.stmt.predicate if prop == PROPSEQ: continue if self.QName(prop) in s: continue #must have be already handled by getPropSeq nextMatches = p.nextSibling and p.nextSibling.stmt.predicate == prop #XXX Test empty and singleton rdf lists and containers if nextMatches or currentlist: parent = currentlist key = len(currentlist) currentlist.append(0) else: parent = s key = self.QName(prop) obj = p.childNodes[0] setobj(obj, res, parent, key) if currentlist and not nextMatches: s[ self.QName(prop) ] = currentlist currentlist = [] #list done if shared: results = { 'results':results, 'shared':shared} #if self.nsmap: # results['prefix'] = self.nsmap return results