def test_util_uniq(self): base = ["michel", "hates", "pizza"] r = util.uniq(base+base) self.assertEquals(sorted(r), sorted(base)) base = ["michel", "hates", "pizza"] r = util.uniq(base+base, strip=True) self.assertEquals(sorted(r), sorted(base))
def test_util_uniq(self): base = ["michel", "hates", "pizza"] r = util.uniq(base + base) self.assertEqual(sorted(r), sorted(base)) base = ["michel", "hates", "pizza"] r = util.uniq(base + base, strip=True) self.assertEqual(sorted(r), sorted(base))
def serialize(self, stream, base=None, encoding=None, **args): self.__serialized = {} store = self.store self.base = base self.max_depth = args.get("max_depth", 3) assert self.max_depth>0, "max_depth must be greater than 0" self.nm = nm = store.namespace_manager self.writer = writer = XMLWriter(stream, nm, encoding) namespaces = {} possible = uniq(store.predicates()) + uniq(store.objects(None, RDF.type)) for predicate in possible: prefix, namespace, local = nm.compute_qname(predicate) namespaces[prefix] = namespace namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" writer.push(RDF.RDF) writer.namespaces(namespaces.iteritems()) # Write out subjects that can not be inline for subject in store.subjects(): if (None, None, subject) in store: if (subject, None, subject) in store: self.subject(subject, 1) else: self.subject(subject, 1) # write out anything that has not yet been reached for subject in store.subjects(): self.subject(subject, 1) writer.pop(RDF.RDF) # Set to None so that the memory can get garbage collected. self.__serialized = None
def serialize(self, stream, base=None, encoding=None, **args): self.__serialized = {} # "Internal" list elements should be forgotten... for l in self.list_heads : if l not in self.list_really_heads : self.__serialized[l] = 1 store = self.store self.base = base #self.max_depth = args.get("max_depth", 3) self.max_depth = 8 self.nm = nm = store.namespace_manager self.writer = writer = XMLWriter(stream, nm, encoding) writer.push(RDF.RDF) # This checks whether a predicate appears with an unallowed URI and creates # an artificial namespace if the answer is yes... for predicate in uniq(store.predicates()): prefix, namespace, name = nm.compute_qname(predicate) # The same for types, that can appear as XML element names for subj,types in uniq(store.subject_objects(RDF.type)) : prefix, namespace, name = nm.compute_qname(types) ns_list= list(self.store.namespaces()) ns_list.sort() writer.namespaces(ns_list) # Write out subjects that can not be inline for subject in store.subjects(): if (None, None, subject) in store: if (subject, None, subject) in store: self.subject(subject, 1) else: self.subject(subject, 1) # write out anything that has not yet been reached for subject in store.subjects(): self.subject(subject, 1) writer.pop(RDF.RDF) # Set to None so that the memory can get garbage collected. self.__serialized = None
def serialize(self, stream, base=None, encoding=None, **args): self.__serialized = {} # "Internal" list elements should be forgotten... for l in self.list_heads: if l not in self.list_really_heads: self.__serialized[l] = 1 store = self.store self.base = base #self.max_depth = args.get("max_depth", 3) self.max_depth = 8 self.nm = nm = store.namespace_manager self.writer = writer = XMLWriter(stream, nm, encoding) writer.push(RDF.RDF) # This checks whether a predicate appears with an unallowed URI and creates # an artificial namespace if the answer is yes... for predicate in uniq(store.predicates()): prefix, namespace, name = nm.compute_qname(predicate) # The same for types, that can appear as XML element names for subj, types in uniq(store.subject_objects(RDF.type)): prefix, namespace, name = nm.compute_qname(types) ns_list = list(self.store.namespaces()) ns_list.sort() writer.namespaces(ns_list) # Write out subjects that can not be inline for subject in store.subjects(): if (None, None, subject) in store: if (subject, None, subject) in store: self.subject(subject, 1) else: self.subject(subject, 1) # write out anything that has not yet been reached for subject in store.subjects(): self.subject(subject, 1) writer.pop(RDF.RDF) # Set to None so that the memory can get garbage collected. self.__serialized = None
def serialize(self, stream, base=None, encoding=None, **args): self.__serialized = {} store = self.store self.base = base self.max_depth = args.get("max_depth", 3) assert self.max_depth > 0, "max_depth must be greater than 0" self.nm = nm = store.namespace_manager self.writer = writer = XMLWriter(stream, nm, encoding) namespaces = {} possible = list(uniq(store.predicates())) + list( uniq(store.objects(None, RDF.type))) for predicate in possible: prefix, namespace, local = nm.compute_qname(predicate) namespaces[prefix] = namespace namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" writer.push(RDF.RDF) writer.namespaces(namespaces.iteritems()) # Write out subjects that can not be inline for subject in store.subjects(): if (None, None, subject) in store: if (subject, None, subject) in store: self.subject(subject, 1) else: self.subject(subject, 1) # write out anything that has not yet been reached # write out BNodes last (to ensure they can be inlined where possible) bnodes = set() for subject in store.subjects(): if isinstance(subject, BNode): bnodes.add(subject) continue self.subject(subject, 1) #now serialize only those BNodes that have not been serialized yet for bnode in bnodes: if bnode not in self.__serialized: self.subject(subject, 1) writer.pop(RDF.RDF) stream.write("\n".encode('utf-8')) # Set to None so that the memory can get garbage collected. self.__serialized = None
def setContext(self, context): # currently ContextContoller calls us self.context = context resources = uniq(self.redfoot.get_context(context).subjects(None, None)) resources.sort() self.resources = resources self.resourcesTable.reloadData() self.resourcesTable.deselectAll_(self) self.resourcesTable.selectRowIndexes_byExtendingSelection_(NSIndexSet.indexSetWithIndex_(0), False)
def serialize(self, stream, base=None, encoding=None, **args): self.__serialized = {} store = self.store self.base = base self.max_depth = args.get("max_depth", 3) assert self.max_depth>0, "max_depth must be greater than 0" self.nm = nm = store.namespace_manager self.writer = writer = XMLWriter(stream, nm, encoding) namespaces = {} possible = list(uniq(store.predicates())) + list(uniq(store.objects(None, RDF.type))) for predicate in possible: prefix, namespace, local = nm.compute_qname(predicate) namespaces[prefix] = namespace namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" writer.push(RDF.RDF) writer.namespaces(namespaces.iteritems()) # Write out subjects that can not be inline for subject in store.subjects(): if (None, None, subject) in store: if (subject, None, subject) in store: self.subject(subject, 1) else: self.subject(subject, 1) # write out anything that has not yet been reached # write out BNodes last (to ensure they can be inlined where possible) bnodes=set() for subject in store.subjects(): if isinstance(subject,BNode): bnodes.add(subject) continue self.subject(subject, 1) #now serialize only those BNodes that have not been serialized yet for bnode in bnodes: if bnode not in self.__serialized: self.subject(subject, 1) writer.pop(RDF.RDF) stream.write("\n".encode('utf-8')) # Set to None so that the memory can get garbage collected. self.__serialized = None
def __bindings(self): store = self.store nm = store.namespace_manager bindings = {} for predicate in uniq(store.predicates()): prefix, namespace, name = nm.compute_qname(predicate) bindings[prefix] = URIRef(namespace) RDFNS = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#") if "rdf" in bindings: assert bindings["rdf"] == RDFNS else: bindings["rdf"] = RDFNS for prefix, namespace in bindings.iteritems(): yield prefix, namespace
def __bindings(self): store = self.store nm = store.namespace_manager bindings = {} for predicate in uniq(store.predicates()): prefix, namespace, name = nm.compute_qname(predicate) bindings[prefix] = URIRef(namespace) RDFNS = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#") if "rdf" in bindings: assert bindings["rdf"]==RDFNS else: bindings["rdf"] = RDFNS for prefix, namespace in bindings.iteritems(): yield prefix, namespace
def serialize(self, stream, base=None, encoding=None, **args): self.__serialized = {} store = self.store self.base = base self.max_depth = args.get("max_depth", 3) assert self.max_depth > 0, "max_depth must be greater than 0" self.nm = nm = store.namespace_manager self.writer = writer = XMLWriter(stream, nm, encoding) namespaces = {} possible = uniq(store.predicates()) + uniq( store.objects(None, RDF.type)) for predicate in possible: prefix, namespace, local = nm.compute_qname(predicate) namespaces[prefix] = namespace namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" writer.push(RDF.RDF) writer.namespaces(namespaces.iteritems()) # Write out subjects that can not be inline for subject in store.subjects(): if (None, None, subject) in store: if (subject, None, subject) in store: self.subject(subject, 1) else: self.subject(subject, 1) # write out anything that has not yet been reached for subject in store.subjects(): self.subject(subject, 1) writer.pop(RDF.RDF) # Set to None so that the memory can get garbage collected. self.__serialized = None
def serialize(self, rem, page=-1): aggr = rem._aggregation_ g = self.merge_graphs(rem) # make nsmap better nm = g.namespace_manager nsmap = {'atom': str(namespaces['atom'])} poss = uniq(g.predicates()) + uniq(g.objects(None, RDF.type)) for pred in poss: pf, ns, l = nm.compute_qname(pred) nsmap[pf] = ns root = Element("{%s}entry" % namespaces['atom'], nsmap=nsmap) # entry/id == tag for entry == ReM dc:identifier # if not exist, generate Yet Another uuid e = SubElement(root, '{%s}id' % namespaces['atom']) if rem._dc.identifier: dcid = rem._dc.identifier[0] e.text = str(dcid) self.done_triples.append( (rem._uri_, namespaces['dc']['identifier'], dcid)) else: e.text = "urn:uuid:%s" % gen_uuid() # entry/title == Aggr's dc:title title = aggr._dc.title tns = 'dc' if not title: title = aggr._dcterms.title tns = 'dcterms' if not title: raise OreException( "Atom Serialisation requires title on aggregation") else: e = SubElement(root, '{%s}title' % namespaces['atom']) dctit = title[0] e.text = str(dctit) self.done_triples.append( (aggr._uri_, namespaces[tns]['title'], dctit)) # entry/author == Aggr's dcterms:creator for who in aggr._dcterms.creator: e = SubElement(root, '{%s}author' % namespaces['atom']) agent = aggr._agents_[who] self.make_agent(e, agent) self.done_triples.append( (aggr._uri_, namespaces['dcterms']['creator'], agent._uri_)) # entry/contributor == Aggr's dcterms:contributor for bn in aggr._dcterms.contributor: e = SubElement(root, '{%s}contributor' % namespaces['atom']) agent = aggr._agents_[who] self.make_agent(e, agent) self.done_triples.append( (aggr._uri_, namespaces['dcterms']['contributor'], agent._uri_)) # entry/category[@scheme="(magic)"][@term="(datetime)"] for t in aggr._dcterms.created: t = t.strip() e = SubElement( root, '{%s}category' % namespaces['atom'], term=str(t), scheme="http://www.openarchives.org/ore/terms/datetime/created" ) for t in aggr._dcterms.modified: t = t.strip() e = SubElement( root, '{%s}category' % namespaces['atom'], term=str(t), scheme="http://www.openarchives.org/ore/terms/datetime/modified" ) # entry/category == Aggr's rdf:type for t in aggr._rdf.type: e = SubElement(root, '{%s}category' % namespaces['atom'], term=str(t)) try: scheme = list(g.objects(t, namespaces['rdfs']['isDefinedBy']))[0] e.set('scheme', str(scheme)) self.done_triples.append( (t, namespaces['rdfs']['isDefinedBy'], scheme)) except: pass try: label = list(g.objects(t, namespaces['rdfs']['label']))[0] e.set('label', str(label)) self.done_triples.append( (t, namespaces['rdfs']['label'], label)) except: pass self.done_triples.append( (aggr._uri_, namespaces['rdf']['type'], t)) # entry/summary if aggr._dc.description: e = SubElement(root, '{%s}summary' % namespaces['atom']) desc = aggr._dc.description[0] e.text = str(desc) self.done_triples.append( (aggr._uri_, namespaces['dc']['description'], desc)) # All aggr links: done = [ namespaces['rdf']['type'], namespaces['ore']['aggregates'], namespaces['dcterms']['creator'], namespaces['dcterms']['contributor'], namespaces['dc']['title'], namespaces['dc']['description'] ] for (p, o) in g.predicate_objects(aggr.uri): if not p in done: if isinstance(o, URIRef): self.make_link(root, p, o, g) self.done_triples.append((aggr._uri_, p, o)) # entry/content // link[@rel="alternate"] # Do we have a splash page? altDone = 0 atypes = aggr._rdf._type possAlts = [] for (r, p) in aggr.resources: mytypes = r._rdf.type if namespaces['eurepo']['humanStartPage'] in mytypes: altDone = 1 self.make_link(root, 'alternate', r.uri, g) break # check if share non Aggregation type # eg aggr == article and aggres == article, likely # to be good alternate for m in mytypes: if m != namespaces['ore']['Aggregation'] and \ m in atypes: possAlt.append(r.uri) if not altDone and possAlts: # XXX more intelligent algorithm here self.make_link(root, '{%s}alternate' % namespaces['atom'], possAlts[0], g) altDone = 1 if not altDone and build_html_atom_content: e = SubElement(root, '{%s}content' % namespaces['atom']) e.set('type', 'html') # make some representative html # this can get VERY LONG so default to not doing this html = ['<ul>'] for (r, p) in aggr.resources: html.append('<li><a href="%s">%s</a></li>' % (r.uri, r.title[0])) html.append('</ul>') e.text = '\n'.join(html) else: e = SubElement(root, '{%s}content' % namespaces['atom']) e.set('type', 'html') e.text = "No Content" # entry/link[@rel='self'] == URI-R self.make_link(root, 'self', rem._uri_, g) # entry/link[@rel='ore:describes'] == URI-A self.make_link(root, namespaces['ore']['describes'], aggr._uri_, g) ### These are generated automatically in merge_graphs # entry/published == ReM's dcterms:created if rem._dcterms.created: e = SubElement(root, '{%s}published' % namespaces['atom']) c = rem._dcterms.created[0] md = str(c) if md.find('Z') == -1: # append Z md += "Z" e.text = md self.done_triples.append( (rem._uri_, namespaces['dcterms']['created'], c)) # entry/updated == ReM's dcterms:modified e = SubElement(root, '{%s}updated' % namespaces['atom']) if rem._dcterms.modified: c = rem._dcterms.modified[0] md = str(c) if md.find('Z') == -1: # append Z md += "Z" e.text = str(md) self.done_triples.append( (rem._uri_, namespaces['dcterms']['modified'], c)) else: e.text = now() # entry/rights == ReM's dc:rights if rem._dc.rights: e = SubElement(root, '{%s}rights' % namespaces['atom']) r = rem._dc.rights[0] e.text = str(r) self.done_triples.append( (rem._uri_, namespaces['dc']['rights'], r)) # entry/source/author == ReM's dcterms:creator if rem._dcterms.creator: # Should at least be our generator! (right?) src = SubElement(root, '{%s}source' % namespaces['atom']) for who in rem._dcterms.creator: e = SubElement(src, '{%s}author' % namespaces['atom']) agent = rem._agents_[who] self.make_agent(e, agent) self.done_triples.append( (rem._uri_, namespaces['dcterms']['creator'], agent._uri_)) for who in rem._dcterms.contributor: e = SubElement(src, '{%s}contributor' % namespaces['atom']) agent = rem._agents_[who] self.make_agent(e, agent) self.done_triples.append( (rem._uri_, namespaces['dcterms']['contributor'], agent._uri_)) e = SubElement(src, '{%s}generator' % namespaces['atom'], uri=str(libraryUri), version=str(libraryVersion)) e.text = str(libraryName) # Remove aggregation, resource map props already done # All of agg res needs to be done for (r, p) in aggr.resources: self.make_link(root, namespaces['ore']['aggregates'], r.uri, g) self.done_triples.append( (aggr._uri_, namespaces['ore']['aggregates'], r._uri_)) # Now create ore:triples # and populate with rdf/xml trips = SubElement(root, '{%s}triples' % namespaces['ore']) self.generate_rdf(trips, g) data = etree.tostring(root, pretty_print=True) #data = data.replace('\n', '') #data = self.spacesub.sub('', data) uri = str(rem._uri_) self.done_triples = [] return ReMDocument(uri, data, format='atom', mimeType=self.mimeType)
def serialize(self, rem, page=-1): aggr = rem._aggregation_ g = self.merge_graphs(rem) # make nsmap better nm = g.namespace_manager nsmap = {'atom' : str(namespaces['atom'])} poss = uniq(g.predicates()) + uniq(g.objects(None, RDF.type)) for pred in poss: pf,ns,l = nm.compute_qname(pred) nsmap[pf] = ns root = Element("{%s}entry" % namespaces['atom'], nsmap=nsmap) # entry/id == tag for entry == ReM dc:identifier # if not exist, generate Yet Another uuid e = SubElement(root, '{%s}id' % namespaces['atom']) if rem._dc.identifier: dcid = rem._dc.identifier[0] e.text = str(dcid) self.done_triples.append((rem._uri_, namespaces['dc']['identifier'], dcid)) else: e.text = "urn:uuid:%s" % gen_uuid() # entry/title == Aggr's dc:title title = aggr._dc.title tns = 'dc' if not title: title = aggr._dcterms.title tns = 'dcterms' if not title: raise OreException("Atom Serialisation requires title on aggregation") else: e = SubElement(root, '{%s}title' % namespaces['atom']) dctit = title[0] e.text = str(dctit) self.done_triples.append((aggr._uri_, namespaces[tns]['title'], dctit)) # entry/author == Aggr's dcterms:creator for who in aggr._dcterms.creator: e = SubElement(root, '{%s}author' % namespaces['atom']) agent = aggr._agents_[who] self.make_agent(e, agent) self.done_triples.append((aggr._uri_, namespaces['dcterms']['creator'], agent._uri_)) # entry/contributor == Aggr's dcterms:contributor for bn in aggr._dcterms.contributor: e = SubElement(root, '{%s}contributor' % namespaces['atom']) agent = aggr._agents_[who] self.make_agent(e, agent) self.done_triples.append((aggr._uri_, namespaces['dcterms']['contributor'], agent._uri_)) # entry/category[@scheme="(magic)"][@term="(datetime)"] for t in aggr._dcterms.created: t = t.strip() e = SubElement(root, '{%s}category' % namespaces['atom'], term=str(t), scheme="http://www.openarchives.org/ore/terms/datetime/created") for t in aggr._dcterms.modified: t = t.strip() e = SubElement(root, '{%s}category' % namespaces['atom'], term=str(t), scheme="http://www.openarchives.org/ore/terms/datetime/modified") # entry/category == Aggr's rdf:type for t in aggr._rdf.type: e = SubElement(root, '{%s}category' % namespaces['atom'], term=str(t)) try: scheme = list(g.objects(t, namespaces['rdfs']['isDefinedBy']))[0] e.set('scheme', str(scheme)) self.done_triples.append((t, namespaces['rdfs']['isDefinedBy'], scheme)) except: pass try: label = list(g.objects(t, namespaces['rdfs']['label']))[0] e.set('label', str(label)) self.done_triples.append((t, namespaces['rdfs']['label'], label)) except: pass self.done_triples.append((aggr._uri_, namespaces['rdf']['type'], t)) # entry/summary if aggr._dc.description: e = SubElement(root, '{%s}summary' % namespaces['atom']) desc = aggr._dc.description[0] e.text = str(desc) self.done_triples.append((aggr._uri_, namespaces['dc']['description'], desc)) # All aggr links: done = [namespaces['rdf']['type'], namespaces['ore']['aggregates'], namespaces['dcterms']['creator'], namespaces['dcterms']['contributor'], namespaces['dc']['title'], namespaces['dc']['description'] ] for (p, o) in g.predicate_objects(aggr.uri): if not p in done: if isinstance(o, URIRef): self.make_link(root, p, o, g) self.done_triples.append((aggr._uri_, p, o)) # entry/content // link[@rel="alternate"] # Do we have a splash page? altDone = 0 atypes = aggr._rdf._type possAlts = [] for (r, p) in aggr.resources: mytypes = r._rdf.type if namespaces['eurepo']['humanStartPage'] in mytypes: altDone = 1 self.make_link(root, 'alternate', r.uri, g) break # check if share non Aggregation type # eg aggr == article and aggres == article, likely # to be good alternate for m in mytypes: if m != namespaces['ore']['Aggregation'] and \ m in atypes: possAlts.append(r.uri) if not altDone and possAlts: # XXX more intelligent algorithm here self.make_link(root, '{%s}alternate' % namespaces['atom'], possAlts[0], g) altDone = 1 if not altDone and build_html_atom_content: e = SubElement(root, '{%s}content' % namespaces['atom']) e.set('type', 'html') # make some representative html # this can get VERY LONG so default to not doing this html = ['<ul>'] for (r, p) in aggr.resources: html.append('<li><a href="%s">%s</a></li>' % (r.uri, r.title[0])) html.append('</ul>') e.text = '\n'.join(html) else: e = SubElement(root, '{%s}content' % namespaces['atom']) e.set('type', 'html') e.text = "No Content" # entry/link[@rel='self'] == URI-R self.make_link(root, 'self', rem._uri_, g) # entry/link[@rel='ore:describes'] == URI-A self.make_link(root, namespaces['ore']['describes'], aggr._uri_, g) ### These are generated automatically in merge_graphs # entry/published == ReM's dcterms:created if rem._dcterms.created: e = SubElement(root, '{%s}published' % namespaces['atom']) c = rem._dcterms.created[0] md = str(c) if md.find('Z') == -1: # append Z md += "Z" e.text = md self.done_triples.append((rem._uri_, namespaces['dcterms']['created'], c)) # entry/updated == ReM's dcterms:modified e = SubElement(root, '{%s}updated' % namespaces['atom']) if rem._dcterms.modified: c = rem._dcterms.modified[0] md = str(c) if md.find('Z') == -1: # append Z md += "Z" e.text = str(md) self.done_triples.append((rem._uri_, namespaces['dcterms']['modified'], c)) else: e.text = now() # entry/rights == ReM's dc:rights if rem._dc.rights: e = SubElement(root, '{%s}rights' % namespaces['atom']) r = rem._dc.rights[0] e.text = str(r) self.done_triples.append((rem._uri_, namespaces['dc']['rights'], r)) # entry/source/author == ReM's dcterms:creator if rem._dcterms.creator: # Should at least be our generator! (right?) src = SubElement(root, '{%s}source' % namespaces['atom']) for who in rem._dcterms.creator: e = SubElement(src, '{%s}author' % namespaces['atom']) agent = rem._agents_[who] self.make_agent(e, agent) self.done_triples.append((rem._uri_, namespaces['dcterms']['creator'], agent._uri_)) for who in rem._dcterms.contributor: e = SubElement(src, '{%s}contributor' % namespaces['atom']) agent = rem._agents_[who] self.make_agent(e, agent) self.done_triples.append((rem._uri_, namespaces['dcterms']['contributor'], agent._uri_)) e = SubElement(src, '{%s}generator' % namespaces['atom'], uri=str(libraryUri), version=str(libraryVersion)) e.text = str(libraryName) # Remove aggregation, resource map props already done # All of agg res needs to be done for (r, p) in aggr.resources: self.make_link(root, namespaces['ore']['aggregates'], r.uri, g) self.done_triples.append((aggr._uri_, namespaces['ore']['aggregates'], r._uri_)) # Now create ore:triples # and populate with rdf/xml trips = SubElement(root, '{%s}triples' % namespaces['ore']) self.generate_rdf(trips, g) data = etree.tostring(root, pretty_print=True) #data = data.replace('\n', '') #data = self.spacesub.sub('', data) uri = str(rem._uri_) self.done_triples = [] return ReMDocument(uri, data, format='atom', mimeType=self.mimeType)