def __urnitem__(self, key): urn = URN(key) if len(urn) <= 2: raise ValueError("Not valid urn") elif hasattr(self, "urn") and self.urn == urn: return self else: if hasattr(self, "urn"): i = len(self.urn) else: i = 2 if isinstance(self, TextInventory): children = self.textgroups elif isinstance(self, TextGroup): children = self.works elif isinstance(self, Work): children = self.texts order = ["", "", URN.TEXTGROUP, URN.WORK, URN.VERSION] while i <= len(urn) - 1: children = children[urn.upTo(order[i])] if not hasattr(children, "urn") or str(children.urn) != urn.upTo(order[i]): error = "Unrecognized urn at " + [ "URN namespace", "CTS Namespace", "URN Textgroup", "URN Work", "URN Version" ][i] raise ValueError(error) i += 1 return children
def __urnitem__(self, key): urn = URN(key) if len(urn) <= 2: raise ValueError("Not valid urn") elif hasattr(self, "urn") and self.urn == urn: return self else: if hasattr(self, "urn"): i = len(self.urn) else: i = 2 if isinstance(self, TextInventory): children = self.textgroups elif isinstance(self, TextGroup): children = self.works elif isinstance(self, Work): children = self.texts order = ["", "", URN.TEXTGROUP, URN.WORK, URN.VERSION] while i <= len(urn) - 1: children = children[urn.upTo(order[i])] if not hasattr(children, "urn") or str( children.urn) != urn.upTo(order[i]): error = "Unrecognized urn at " + [ "URN namespace", "CTS Namespace", "URN Textgroup", "URN Work", "URN Version" ][i] raise ValueError(error) i += 1 return children
def test_from_textgroup_emptiness(self): a = URN("urn:cts:greekLit:textgroup") self.assertEqual(a.upTo(URN.COMPLETE), "urn:cts:greekLit:textgroup") self.assertEqual(str(a), "urn:cts:greekLit:textgroup") self.assertEqual(a.upTo(URN.NAMESPACE), "urn:cts:greekLit") self.assertEqual(a.upTo(URN.TEXTGROUP), "urn:cts:greekLit:textgroup") self.assertIsNone(a.work) self.assertIsNone(a.version) self.assertIsNone(a.reference)
def test_from_text_emptiness(self): a = URN("urn:cts:greekLit:textgroup.work.text") self.assertEqual(str(a), "urn:cts:greekLit:textgroup.work.text") self.assertEqual(a.upTo(URN.COMPLETE), "urn:cts:greekLit:textgroup.work.text") self.assertEqual(a.upTo(URN.NAMESPACE), "urn:cts:greekLit") self.assertEqual(a.upTo(URN.TEXTGROUP), "urn:cts:greekLit:textgroup") self.assertEqual(a.upTo(URN.WORK), "urn:cts:greekLit:textgroup.work") self.assertEqual(a.upTo(URN.VERSION), "urn:cts:greekLit:textgroup.work.text") self.assertIsNone(a.reference)
def test_no_end_text_emptiness(self): a = URN("urn:cts:greekLit:textgroup.work.text:1") self.assertEqual(str(a), "urn:cts:greekLit:textgroup.work.text:1") self.assertEqual(a.upTo(URN.COMPLETE), "urn:cts:greekLit:textgroup.work.text:1") self.assertEqual(a.upTo(URN.NAMESPACE), "urn:cts:greekLit") self.assertEqual(a.upTo(URN.TEXTGROUP), "urn:cts:greekLit:textgroup") self.assertEqual(a.upTo(URN.WORK), "urn:cts:greekLit:textgroup.work") self.assertEqual(a.upTo(URN.VERSION), "urn:cts:greekLit:textgroup.work.text") self.assertEqual(a.upTo(URN.PASSAGE), "urn:cts:greekLit:textgroup.work.text:1") self.assertEqual(a.upTo(URN.NO_PASSAGE), "urn:cts:greekLit:textgroup.work.text") self.assertEqual(a.reference, Reference("1")) self.assertIsNone(a.reference.end)
def __getText__(self, urn): """ Returns a CtsTextMetadata object :param urn: URN of a text to retrieve :type urn: str, URN :return: Textual resource and metadata :rtype: (CapitainsCtsText, InventoryText) """ if not isinstance(urn, URN): urn = URN(urn) if len(urn) != 5: if len(urn) == 4: urn, reference = urn.upTo(URN.WORK), str(urn.reference) urn = [ t.id for t in self.texts if t.id.startswith(str(urn)) and isinstance(t, XmlCtsEditionMetadata) ] if len(urn) > 0: urn = URN(urn[0]) else: raise UnknownObjectError else: raise InvalidURN text = self.inventory[str(urn)] if os.path.isfile(text.path): with io.open(text.path) as __xml__: resource = self.TEXT_CLASS(urn=urn, resource=self.xmlparse(__xml__)) else: resource = None self.logger.warning('The file {} is mentioned in the metadata but does not exist'.format(text.path)) return resource, text
def test_upTo(self): a = URN("urn:cts:greekLit:tlg0012.tlg001.mth-01:[email protected]@the[2]") self.assertEqual(a.upTo(URN.COMPLETE), "urn:cts:greekLit:tlg0012.tlg001.mth-01:[email protected]@the[2]") self.assertEqual(a.upTo(URN.NAMESPACE), "urn:cts:greekLit") self.assertEqual(a.upTo(URN.TEXTGROUP), "urn:cts:greekLit:tlg0012") self.assertEqual(a.upTo(URN.WORK), "urn:cts:greekLit:tlg0012.tlg001") self.assertEqual(a.upTo(URN.VERSION), "urn:cts:greekLit:tlg0012.tlg001.mth-01") self.assertEqual(a.upTo(URN.PASSAGE), "urn:cts:greekLit:tlg0012.tlg001.mth-01:[email protected]@the[2]") self.assertEqual(a.upTo(URN.PASSAGE_START), "urn:cts:greekLit:tlg0012.tlg001.mth-01:1.1@Achilles") self.assertEqual(a.upTo(URN.PASSAGE_END), "urn:cts:greekLit:tlg0012.tlg001.mth-01:1.10@the[2]")
def test_missing_text_in_passage_emptiness(self): a = URN("urn:cts:greekLit:textgroup.work:1-2") self.assertEqual(str(a), "urn:cts:greekLit:textgroup.work:1-2") self.assertEqual(a.upTo(URN.COMPLETE), "urn:cts:greekLit:textgroup.work:1-2") self.assertEqual(a.upTo(URN.NAMESPACE), "urn:cts:greekLit") self.assertEqual(a.upTo(URN.TEXTGROUP), "urn:cts:greekLit:textgroup") self.assertEqual(a.upTo(URN.WORK), "urn:cts:greekLit:textgroup.work") self.assertEqual(a.upTo(URN.NO_PASSAGE), "urn:cts:greekLit:textgroup.work") self.assertEqual(a.upTo(URN.PASSAGE), "urn:cts:greekLit:textgroup.work:1-2") self.assertEqual(a.upTo(URN.PASSAGE_START), "urn:cts:greekLit:textgroup.work:1") self.assertEqual(a.upTo(URN.PASSAGE_END), "urn:cts:greekLit:textgroup.work:2") self.assertEqual(a.reference, Reference("1-2")) self.assertEqual(a.reference.start, Reference("1")) self.assertEqual(a.reference.end, Reference("2")) self.assertIsNone(a.version)
def _r_GetFirstUrn(self, urn): """ Provisional route for GetFirstUrn request :param urn: URN to filter the resource :param inv: Inventory Identifier :return: GetFirstUrn response """ urn = URN(urn) subreference = None textId = urn.upTo(URN.NO_PASSAGE) if urn.reference is not None: subreference = str(urn.reference) firstId = self.resolver.getTextualNode( textId=textId, subreference=subreference).firstId r = render_template("cts/GetFirstUrn.xml", firstId=firstId, full_urn=textId, request_urn=str(urn)) return r, 200, {"content-type": "application/xml"}
def _r_GetPrevNext(self, urn): """ Provisional route for GetPrevNext request :param urn: URN to filter the resource :param inv: Inventory Identifier :return: GetPrevNext response """ urn = URN(urn) subreference = None textId = urn.upTo(URN.NO_PASSAGE) if urn.reference is not None: subreference = str(urn.reference) previous, nextious = self.resolver.getSiblings( textId=textId, subreference=subreference) r = render_template("cts/GetPrevNext.xml", prev_urn=previous, next_urn=nextious, urn=textId, request_urn=str(urn)) return r, 200, {"content-type": "application/xml"}
def _r_GetValidReff(self, urn, level): """ Provisional route for GetValidReff request :param urn: URN to filter the resource :param inv: Inventory Identifier :return: GetValidReff response """ urn = URN(urn) subreference = None textId = urn.upTo(URN.NO_PASSAGE) if urn.reference is not None: subreference = str(urn.reference) reffs = self.resolver.getReffs(textId=textId, subreference=subreference, level=level) r = render_template("cts/GetValidReff.xml", reffs=reffs, urn=textId, level=level, request_urn=str(urn)) return r, 200, {"content-type": "application/xml"}
def _r_GetPassagePlus(self, urn): """ Provisional route for GetPassagePlus request :param urn: URN to filter the resource :param inv: Inventory Identifier :return: GetPassagePlus response """ urn = URN(urn) subreference = None if len(urn) < 4: raise InvalidURN if urn.reference is not None: subreference = str(urn.reference) node = self.resolver.getTextualNode(textId=urn.upTo(URN.NO_PASSAGE), subreference=subreference) r = render_template( "cts/GetPassagePlus.xml", filters="urn={}".format(urn), request_urn=str(urn), full_urn=node.urn, prev_urn=node.prevId, next_urn=node.nextId, metadata={ "groupname": [(literal.language, str(literal)) for literal in node.metadata.get(RDF_NAMESPACES.CTS.groupname) ], "title": [(literal.language, str(literal)) for literal in node.metadata.get(RDF_NAMESPACES.CTS.title)], "description": [(literal.language, str(literal)) for literal in node.metadata.get( RDF_NAMESPACES.CTS.description)], "label": [(literal.language, str(literal)) for literal in node.metadata.get(RDF_NAMESPACES.CTS.label)] }, citation=Markup(node.citation.export(Mimetypes.XML.CTS)), passage=Markup(node.export(Mimetypes.XML.TEI))) return r, 200, {"content-type": "application/xml"}
def _r_GetPassage(self, urn): """ Provisional route for GetPassage request :param urn: URN to filter the resource :param inv: Inventory Identifier :return: GetPassage response """ urn = URN(urn) subreference = None if len(urn) < 4: raise InvalidURN if urn.reference is not None: subreference = str(urn.reference) node = self.resolver.getTextualNode(textId=urn.upTo(URN.NO_PASSAGE), subreference=subreference) r = render_template("cts/GetPassage.xml", filters="urn={}".format(urn), request_urn=str(urn), full_urn=node.urn, passage=Markup(node.export(Mimetypes.XML.TEI))) return r, 200, {"content-type": "application/xml"}
def __getText__(self, urn): """ Returns a PrototypeText object :param urn: URN of a text to retrieve :type urn: str, URN :return: Textual resource and metadata :rtype: (Text, InventoryText) """ if not isinstance(urn, URN): urn = URN(urn) if len(urn) != 5: if len(urn) == 4: urn, reference = urn.upTo(URN.WORK), str(urn.reference) urn = [ t.id for t in self.texts if t.id.startswith(str(urn)) and isinstance(t, Edition) ] if len(urn) > 0: urn = URN(urn[0]) else: raise UnknownCollection else: raise InvalidURN try: text = self.inventory[str(urn)] except MyCapytain.errors.UnknownCollection as E: raise UnknownCollection(str(E)) except Exception as E: raise E if os.path.isfile(text.path): resource = self.read(identifier=urn, path=text.path) else: resource = None raise UnknownCollection("File matching %s does not exist" % text.path) return resource, text
class Passage(MyCapytain.resources.texts.tei.Passage): """ Passage class for local texts which is fast but contains the minimum DOM. For design purposes, some people would prefer passage to be found quickly (Text indexing for example). Passage keeps only the node found through the xpath **Example** : for a text with a citation scheme with following refsDecl : `/TEI/text/body/div[@type='edition']/div[@n='$1']/div[@n='$2']/l[@n='$3']` and a passage 1.1.1, this class will build an XML tree looking like the following .. code-block:: xml <l n='1'>Lorem ipsum</l> :param urn: A URN identifier :type urn: URN :param resource: A resource :type resource: etree._Element :param parent: Parent of the current passage :type parent: Passage :param citation: Citation for children level :type citation: Citation :param reference: Identifier of the subreference without URN information :type reference: Reference, List .. warning:: This passage system does not accept range """ def __init__(self, urn=None, resource=None, parent=None, citation=None, reference=None): super(Passage, self).__init__(resource=resource, parent=parent) self.__next = False self.__prev = False self.citation = None if isinstance(citation, Citation): self.citation = citation self.__reference = Reference("") if urn: self.urn = urn if reference: self.reference = reference self.__children = OrderedDict() self.__parsed = False @property def reference(self): """ Id represents the passage subreference as a list of basestring :returns: Representation of the passage subreference as a list :rtype: Reference """ return self.__reference @reference.setter def reference(self, value): """ Set up ID property :param value: Representation of the passage subreference as a list :type value: list, tuple, Reference .. note:: `Passage.id = [..]` will update automatically the URN property as well if correct """ _value = None if isinstance(value, (list, tuple)): _value = Reference(".".join(value)) elif isinstance(value, basestring): _value = Reference(value) elif isinstance(value, Reference): _value = value if _value and self.__reference != _value: self.__reference = _value if self._URN and len(self._URN): if len(value): self._URN = URN("{}:{}".format( self._URN.upTo(URN.NO_PASSAGE), str(_value))) else: self._URN = URN(self._URN["text"]) @property def urn(self): """ URN Identifier of the object :rtype: URN """ return self._URN @urn.setter def urn(self, value): """ Set the urn :param value: URN to be saved :type value: URN, basestring, str :raises: *TypeError* when the value is not URN compatible .. note:: `Passage.URN = ...` will update automatically the id property if Passage is set """ a = self._URN if isinstance(value, basestring): value = URN(value) elif not isinstance(value, URN): raise TypeError() if str(a) != str(value): self._URN = value if value.reference and self.__reference != value.reference: self.__reference = value.reference elif not value.reference and self.__reference and len( self.__reference): self._URN = URN("{}:{}".format(str(value), str(self.__reference))) def get(self, key=None): """ Get a child or multiple children :param key: String identifying a passage :type key: basestring or int :raises KeyError: When key identifies a child unknown to this passage :rtype: List.Passage :returns: List of passage identified by key. If key is None, returns all children .. note:: Call time depends on parsing status. If the passage was never parsed, then on first call citation is used to find children """ if len(self.__children) == 0 and self.__parsed is False: self.__parse() if key is None: return [self.__children[key] for key in self.__children] elif isinstance(key, int): keys = list(self.__children.copy().keys()) return [self.__children[keys[key]]] elif key not in self.__children: raise KeyError() else: return [self.__children[key]] def __parse(self): """ Private method for parsing children """ if self.citation is None: self.__parsed = True return [] elements = self.resource.xpath( "." + self.citation.fill(passage=None, xpath=True), namespaces=NS) ids = [ self.reference.list + [element.get("n")] for element in elements ] ns = [".".join(_id) for _id in ids] # Checking for duplicates duplicates = set([n for n in ns if ns.count(n) > 1]) if len(duplicates) > 0: message = ", ".join(duplicates) warnings.warn(message, DuplicateReference) for element, _id, n in zip(elements, ids, ns): self.__children[n] = Passage(resource=element, citation=self.citation.child, reference=_id, urn=self.urn, parent=self) self.__parsed = True @property def first(self): """ First child of current Passage :returns: None if current Passage has no children, first child passage if available :rtype: None, Passage """ try: return self.get(0)[0] except: return None @property def last(self): """ Last child of current Passage :returns: None if current Passage has no children, last child passage if available :rtype: None, Passage """ try: return self.get(-1)[0] except: return None @property def children(self): """ Children of the passage :returns: Dictionary of chidren, where key are subreferences :rtype: OrderedDict """ if len(self.__children) == 0 and self.__parsed is False: self.__parse() return self.__children @property def next(self): """ Next passage :returns: Next passage at same level :rtype: Passage """ if self.__next is False: if self.parent is None: # When top of hierarchy is access, should return None self.__next = None return None keys = list(self.parent.children.copy().keys()) current = keys.index(str(self.reference)) if len(keys) - 1 > current: self.__next = self.parent.get(keys[current + 1])[0] else: n = self.parent.next if n is None: self.__next = None else: self.__next = n.first return self.__next @property def prev(self): """ Previous passage :returns: Previous passage at same level :rtype: Passage """ if self.__prev is False: if self.parent is None: # When top of hierarchy is access, should return None self.__prev = None return None keys = list(self.parent.children.copy().keys()) current = keys.index(str(self.reference)) if current > 0: self.__prev = self.parent.get(keys[current - 1])[0] else: n = self.parent.prev if n is None: self.__prev = None else: self.__prev = n.last return self.__prev
def test_warning_on_empty(self): with self.assertRaises(ValueError): a = URN("urn:cts") with self.assertRaises(KeyError): a = URN("urn:cts:ns:tg.work:1") a.upTo(URN.VERSION)