def getOr(subject, predicate, *args, **kwargs): """ Retrieve a metadata node or generate a new one :param subject: Subject to which the metadata node should be connected :param predicate: Predicate by which the metadata node should be connected :return: Metadata for given node :rtype: Metadata """ if (subject, predicate, None) in get_graph(): return Metadata( node=get_graph().objects(subject, predicate).__next__()) return Metadata(*args, **kwargs)
def __export__(self, output=None, context=False, namespace_manager=None, **kwargs): if output == Mimetypes.JSON.DTS.Std: if not namespace_manager: namespace_manager = get_graph().namespace_manager cite_type_term = str( namespace_manager.qname(RDF_NAMESPACES.DTS.term("citeType"))) cite_structure_term = str( namespace_manager.qname( RDF_NAMESPACES.DTS.term("citeStructure"))) _out = {cite_type_term: self.name} if not self.is_empty(): _out[cite_structure_term] = [ cite.export(output, context=False, namespace_manager=namespace_manager) for cite in self.children ] if context: _out["@context"] = { cite_type_term.split(":")[0]: str(RDF_NAMESPACES.DTS) } return _out
def __init__(self, node=None, *args, **kwargs): super(Metadata, self).__init__(*args, **kwargs) self.__graph__ = get_graph() if node is not None: self.__node__ = node else: self.__node__ = BNode()
def test_post_work_dispatching_active(self): """ Dispatching is working after editions, we dispatch based on citation scheme""" tic = CtsTextInventoryCollection() poetry = CtsTextInventoryMetadata("urn:perseus:poetry", parent=tic) prose = CtsTextInventoryMetadata("urn:perseus:prose", parent=tic) dispatcher = CollectionDispatcher(tic, default_inventory_name="urn:perseus:prose") @dispatcher.inventory("urn:perseus:poetry") def dispatchPoetry(collection, **kwargs): for readable in collection.readableDescendants: for citation in readable.citation: if citation.name == "line": return True return False resolver = CtsCapitainsLocalResolver( ["./tests/testing_data/latinLit2"], dispatcher=dispatcher ) all = resolver.getMetadata().export(Mimetypes.XML.CTS) poetry_stuff = resolver.getMetadata("urn:perseus:poetry").export(Mimetypes.XML.CTS) prose_stuff = resolver.getMetadata("urn:perseus:prose").export(Mimetypes.XML.CTS) get_graph().remove((None, None, None)) del poetry, prose poetry, prose = XmlCtsTextInventoryMetadata.parse(poetry_stuff), XmlCtsTextInventoryMetadata.parse(prose_stuff) self.assertEqual( len(poetry.textgroups), 3, "There should be 3 textgroups in Poetry (Martial, Ovid and Juvenal)" ) self.assertIsInstance(poetry, CtsTextInventoryMetadata, "should be textinventory") self.assertEqual( len(prose.textgroups), 1, "There should be one textgroup in Prose (Greek texts)" ) get_graph().remove((None, None, None)) del poetry, prose all = XmlCtsTextInventoryMetadata.parse(all) self.assertEqual( len(all.readableDescendants), 26, "There should be all 26 readable descendants in the master collection" )
def __init__(self, identifier: str=None, metadata: Metadata=None): self._graph = get_graph() self._identifier = identifier self._node = BNode() self._metadata = metadata or Metadata(node=self.asNode()) self._graph.addN([ (self._node, RDF_NAMESPACES.DTS.implements, URIRef(identifier), self._graph)#, #(self.__node__, RDF_NAMESPACES.DTS.metadata, self.metadata.asNode(), self.__graph__) ])
def __init__(self, identifier="", *args, **kwargs): super(Collection, self).__init__(identifier, *args, **kwargs) self._graph = get_graph() self._node = URIRef(identifier) self._metadata = Metadata(node=self.asNode()) self.graph.set((self.asNode(), RDF.type, self.TYPE_URI)) self.graph.set((self.asNode(), RDF_NAMESPACES.DTS.model, self.MODEL_URI)) self._parent = None self._children = {}
def __init__(self, identifier="", *args, **kwargs): super(Collection, self).__init__(identifier, *args, **kwargs) self.__graph__ = get_graph() self.__node__ = URIRef(identifier) self.__metadata__ = Metadata(node=self.asNode()) self.__capabilities__ = Metadata.getOr(self.asNode(), RDF_NAMESPACES.DTS.capabilities) self.graph.set((self.asNode(), RDF.type, self.TYPE_URI)) self.graph.set( (self.asNode(), RDF_NAMESPACES.DTS.model, self.MODEL_URI)) self.graph.addN([(self.asNode(), RDF_NAMESPACES.DTS.capabilities, self.capabilities.asNode(), self.graph)]) self.__parent__ = None self.__children__ = {}
def __export__(self, output=None, **kwargs): if output == Mimetypes.XML.CTS: if self.xpath is None and self.scope is None and self.refsDecl is None: return "" child = "" if isinstance(self.child, Citation): child = self.child.export(output=output) label = "" if self.name is not None: label = self.name return make_xml_node(get_graph(), RDF_NAMESPACES.CTS.citation, attributes={ "xpath": re.sub(Citation.escape, "'", self.xpath), "scope": re.sub(Citation.escape, "'", self.scope), "label": label }, innerXML=child, complete=True) elif output == Mimetypes.XML.TEI: if self.refsDecl is None: return "" label = "" if self.name is not None: label = self.name return \ "<tei:cRefPattern n=\"{label}\" matchPattern=\"{regexp}\" replacementPattern=\"#xpath({refsDecl})\">" \ "<tei:p>This pointer pattern extracts {label}</tei:p></tei:cRefPattern>".format( refsDecl=self.refsDecl, label=label, regexp="\.".join(["(\w+)"]*self.refsDecl.count("$")) )
def setUp(self): get_graph().remove((None, None, None)) self.resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"])
def setUp(self): get_graph().remove((None, None, None)) self.resolver = CtsCapitainsLocalResolver(["./tests/testing_data/latinLit2"])
def graph(self): return get_graph()
def __export__(self, output=None, domain=""): """ Export the collection item in the Mimetype required. ..note:: If current implementation does not have special mimetypes, reuses default_export method :param output: Mimetype to export to (Uses MyCapytain.common.utils.Mimetypes) :type output: str :param domain: Domain (Necessary sometime to express some IDs) :type domain: str :return: Object using a different representation """ if output == Mimetypes.JSON.DTS.Std: nm = self.graph.namespace_manager bindings = {} for predicate in set(self.graph.predicates()): prefix, namespace, name = nm.compute_qname(predicate) bindings[prefix] = str(URIRef(namespace)) RDFSLabel = self.graph.qname(RDFS.label) RDFType = self.graph.qname(RDF.type) store = Subgraph(get_graph().namespace_manager) store.graphiter(self.graph, self.metadata, ascendants=0, descendants=1) metadata = {} for _, predicate, obj in store.graph: k = self.graph.qname(predicate) if k in metadata: if isinstance(metadata[k], list): metadata[k].append(LiteralToDict(obj)) else: metadata[k] = [metadata[k], LiteralToDict(obj)] else: metadata[k] = LiteralToDict(obj) o = { "@context": bindings, "@graph": { "@id": self.id, RDFType: str(self.type), RDFSLabel: LiteralToDict(self.get_label()) or self.id, self.graph.qname(RDF_NAMESPACES.DTS.size): len(self.members), self.graph.qname(RDF_NAMESPACES.DTS.metadata): metadata } } version = self.version if version is not None: o["@graph"]["version"] = str(version) if len(self.members): o["@graph"][self.graph.qname(RDF_NAMESPACES.DTS.members)] = [{ "@id": member.id, RDFSLabel: LiteralToDict(member.get_label()) or member.id, self.graph.qname(RDF_NAMESPACES.DTS.url): domain + member.id } for member in self.members] if self.parent: o["@graph"][self.graph.qname(RDF_NAMESPACES.DTS.parents)] = [{ "@id": member.id, RDFSLabel: LiteralToDict(member.get_label()) or member.id, self.graph.qname(RDF_NAMESPACES.DTS.url): domain + member.id } for member in self.parents] del store return o elif output == Mimetypes.JSON.LD\ or output == Mimetypes.XML.RDF: # We create a temp graph store = Subgraph(get_graph().namespace_manager) store.graphiter(self.graph, self.asNode(), ascendants=1, descendants=-1) o = store.serialize(format=RDFLIB_MAPPING[output], auto_compact=True, indent="") del store return o
def __export__(self, output=None, namespace_manager=None): """ Export the collection item in the Mimetype required. ..note:: If current implementation does not have special mimetypes, reuses default_export method :param output: Mimetype to export to (Uses MyCapytain.common.utils.Mimetypes) :type output: str :return: Object using a different representation """ if output == Mimetypes.JSON.DTS.Std: # Set-up a derived Namespace Manager if not namespace_manager: nsm = { prefix: ns for prefix, ns in self.graph.namespace_manager.namespaces() if str(ns) not in [_ns_cap_str, _ns_cts_str, _ns_dts_str, _ns_dct_str, _ns_hydra_str] } nsm[""] = RDF_NAMESPACES.HYDRA nsm["cts"] = RDF_NAMESPACES.CTS nsm["dts"] = RDF_NAMESPACES.DTS nsm["dct"] = DCTERMS else: nsm = namespace_manager.namespaces() # Set-up a derived graph store = Subgraph(nsm) store.graphiter(self.graph, self.asNode(), ascendants=0, descendants=1) graph = store.graph nsm = store.graph.namespace_manager # Build the JSON-LD @context ignore_ns_for_bindings = [_ns_cap_str, _ns_hydra_str, _ns_rdf_str, _ns_rdfs_str] bindings = {} for predicate in set(graph.predicates()): prefix, namespace, name = nsm.compute_qname(predicate) if prefix not in bindings and str(namespace) not in ignore_ns_for_bindings: bindings[prefix] = str(URIRef(namespace)) # Builds the specific Store data extensions = {} dublincore = {} ignore_ns = [_ns_cap_str, _ns_hydra_str, _ns_rdf_str, _ns_rdfs_str, _ns_dts_str] # Builds the .dublincore and .extensions graphs for _, predicate, obj in store.graph: k = graph.qname(predicate) prefix, namespace, name = nsm.compute_qname(predicate) namespace = str(namespace) # Ignore namespaces that are part of the root DTS object if namespace in ignore_ns: continue # Switch to the correct container depending on namespaces if namespace == str(DCTERMS): metadata = dublincore else: metadata = extensions if k in metadata: if isinstance(metadata[k], list): metadata[k].append(literal_to_dict(obj)) else: metadata[k] = [metadata[k], literal_to_dict(obj)] else: metadata[k] = literal_to_dict(obj) if isinstance(metadata[k], dict): metadata[k] = [metadata[k]] o = {"@context": bindings} o.update(self.export_base_dts(graph, self, nsm)) o["@context"]["@vocab"] = _ns_hydra_str if extensions: o[graph.qname(RDF_NAMESPACES.DTS.extensions)] = extensions if dublincore: o[graph.qname(RDF_NAMESPACES.DTS.dublincore)] = dublincore if self.size: o[graph.qname(RDF_NAMESPACES.HYDRA.member)] = [ self.export_base_dts(self.graph, member, nsm) for member in self.members ] # If the system handles citation structure if hasattr(self, "citation") and \ isinstance(self.citation, BaseCitationSet): if self.citation.depth: o[graph.qname(RDF_NAMESPACES.DTS.term("citeDepth"))] = self.citation.depth if not self.citation.is_empty(): o[graph.qname(RDF_NAMESPACES.DTS.term("citeStructure"))] = self.citation.export( Mimetypes.JSON.DTS.Std, context=False, namespace_manager=nsm ) del store return o elif output == Mimetypes.JSON.LD\ or output == Mimetypes.XML.RDF: # We create a temp graph store = Subgraph(get_graph().namespace_manager) store.graphiter(self.graph, self.asNode(), ascendants=1, descendants=-1) o = store.serialize(format=RDFLIB_MAPPING[output], auto_compact=True, indent="") del store return o
def setUp(self): get_graph().remove((None, None, None))
from MyCapytain.common.reference import URN from rdflib.namespace import DC, DCTERMS, Namespace from lxml import etree import re from chetc import Epigraph2Markup from jinja2 import Template from os import makedirs, path with open('templates/template.jinja.xml') as f: template = Template(f.read()) with open('templates/template.textgroup.xml') as f: tgtemplate = Template(f.read()) SAWS = Namespace("http://purl.org/saws/ontology#") get_graph().bind("dc", DC) get_graph().bind("dct", DCTERMS) get_graph().bind("saws", SAWS) PLACE = re.compile("<b>province:<\/b>(.*)<b>place:<\/b>") EDCS = re.compile("<b>EDCS-ID:</b> EDCS-(\w+)") CIL = re.compile("\s*CIL\s*04,\s*(p?\s*[\*a-zA-Z0-9]+)") CIL_NUMBER = re.compile("^(\d+)\w?$") TRISMEGISTOS = re.compile( "http:\/\/db\.edcs\.eu\/epigr\/partner\.php\?param=.*(T\w+)\"") TRISMEGISTOS_PLACE = re.compile( "http:\/\/www\.trismegistos\.org\/place\/(\w+)") PUBLICATION = re.compile("publication:<\/b>([a-z,\-\+\/\*A-Z =\(\)0-9]+)<[ab]") ORT = re.compile( "ort='([[a-zA-Z\s\/\-]+)'&latitude='(\d+\.\d+)'&longitude='(\d+\.\d+)'&" )
def test_dispatching_output(self): tic = CtsTextInventoryCollection() latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic) latin.set_label("Classical Latin", "eng") farsi = CtsTextInventoryMetadata("urn:perseus:farsiLit", parent=tic) farsi.set_label("Farsi", "eng") gc = CtsTextInventoryMetadata("urn:perseus:greekLit", parent=tic) gc.set_label("Ancient Greek", "eng") gc.set_label("Grec Ancien", "fre") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False @dispatcher.inventory("urn:perseus:farsiLit") def dispatchfFarsiLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:farsiLit:"): return True return False @dispatcher.inventory("urn:perseus:greekLit") def dispatchGreekLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:greekLit:"): return True return False resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"], dispatcher=dispatcher) resolver.logger.disabled = True resolver.REMOVE_EMPTY = False resolver.parse() all = resolver.getMetadata().export(Mimetypes.XML.CTS) latin_stuff = resolver.getMetadata("urn:perseus:latinLit").export( Mimetypes.XML.CTS) greek_stuff = resolver.getMetadata("urn:perseus:greekLit").export( Mimetypes.XML.CTS) farsi_stuff = resolver.getMetadata("urn:perseus:farsiLit").export( Mimetypes.XML.CTS) get_graph().remove((None, None, None)) latin_stuff, greek_stuff, farsi_stuff = XmlCtsTextInventoryMetadata.parse(latin_stuff), \ XmlCtsTextInventoryMetadata.parse(greek_stuff), \ XmlCtsTextInventoryMetadata.parse(farsi_stuff) self.assertEqual(len(latin_stuff.readableDescendants), 19, "There should be 19 readable descendants in Latin") self.assertIsInstance(latin_stuff, CtsTextInventoryMetadata, "should be textinventory") self.assertEqual( len(greek_stuff.readableDescendants), 6, "There should be 6 readable descendants in Greek [6 only in __cts__.xml]" ) self.assertEqual(len(farsi_stuff.descendants), 0, "There should be nothing in FarsiLit") self.assertEqual( greek_stuff.get_label("fre"), None, # Text inventory have no label in CTS "Label should be correct") get_graph().remove((None, None, None)) all = XmlCtsTextInventoryMetadata.parse(all) self.assertEqual( len(all.readableDescendants), 25, "There should be all 25 readable descendants in the master collection" )
def __export__(self, output=Mimetypes.JSON.Std, only=None, exclude=None, **kwargs): """ Export a set of Metadata :param output: Mimetype to export to :param only: Includes only term from given namespaces :param exclude: Includes only term from given namespaces :return: Formatted Export .. warning:: exclude and warning cannot be used together """ graph = Graph() graph.namespace_manager = get_graph().namespace_manager if only is not None: _only = only only = [str(s) for s in only] for predicate in set(self.graph.predicates(subject=self.asNode())): if str(predicate) not in only: prefix, namespace, name = self.graph.compute_qname( predicate) if str(namespace) in only: _only.append(predicate) for predicate, obj in self.graph[self.asNode()]: if predicate in _only: graph.add((self.asNode(), predicate, obj)) elif exclude is not None: _only = [] exclude = [str(s) for s in exclude] for predicate in set(self.graph.predicates(subject=self.asNode())): prefix, namespace, name = self.graph.compute_qname(predicate) if str(predicate ) not in exclude and not str(namespace) in exclude: _only.append(predicate) for predicate, obj in self.graph[self.asNode()]: if predicate in _only: graph.add((self.asNode(), predicate, obj)) else: for predicate, object in self.graph[self.asNode()]: graph.add((self.asNode(), predicate, object)) if output == Mimetypes.JSON.Std: out = {} for _, predicate, object in graph: predicate = str(predicate) if predicate not in out: out[predicate] = {} if isinstance(object, Literal): if object.language in out[predicate]: if isinstance(out[predicate][object.language], str): out[predicate][object.language] = [ out[predicate][object.language] ] out[predicate][object.language].append(object.title()) else: out[predicate][object.language] = object.title() del graph return out elif output == Mimetypes.JSON.LD: out = graph.serialize(format="json-ld", context={}) del graph return out elif output == Mimetypes.XML.RDF: out = graph.serialize(format="xml") del graph return out elif output == Mimetypes.XML.CapiTainS.CTS: strings = [] for pred, obj in graph.predicate_objects(self.asNode()): kwargs = {} if hasattr(obj, "language") and obj.language is not None: kwargs["xml:lang"] = obj.language if hasattr(obj, "datatype") and obj.datatype is not None: kwargs["rdf:type"] = obj.datatype strings.append( make_xml_node(graph, pred, text=obj, attributes=kwargs, complete=True)) del graph return "\n".join(strings)