def __getText__(self, urn): """ Returns a CtsTextMetadata object :param urn: URN of a text to retrieve :type urn: str, URN :return: Textual resource and metadata :rtype: (CapitainsCtsText, InventoryText) """ if not isinstance(urn, URN): urn = URN(urn) if len(urn) != 5: if len(urn) == 4: urn, reference = urn.upTo(URN.WORK), str(urn.reference) urn = [ t.id for t in self.texts if t.id.startswith(str(urn)) and isinstance(t, XmlCtsEditionMetadata) ] if len(urn) > 0: urn = URN(urn[0]) else: raise UnknownObjectError else: raise InvalidURN text = self.inventory[str(urn)] if os.path.isfile(text.path): with io.open(text.path) as __xml__: resource = self.TEXT_CLASS(urn=urn, resource=self.xmlparse(__xml__)) else: resource = None self.logger.warning('The file {} is mentioned in the metadata but does not exist'.format(text.path)) return resource, text
def __urnitem__(self, key): urn = URN(key) if len(urn) <= 2: raise ValueError("Not valid urn") elif hasattr(self, "urn") and self.urn == urn: return self else: if hasattr(self, "urn"): i = len(self.urn) else: i = 2 if isinstance(self, TextInventory): children = self.textgroups elif isinstance(self, TextGroup): children = self.works elif isinstance(self, Work): children = self.texts order = ["", "", URN.TEXTGROUP, URN.WORK, URN.VERSION] while i <= len(urn) - 1: children = children[urn.upTo(order[i])] if not hasattr(children, "urn") or str(children.urn) != urn.upTo(order[i]): error = "Unrecognized urn at " + [ "URN namespace", "CTS Namespace", "URN Textgroup", "URN Work", "URN Version" ][i] raise ValueError(error) i += 1 return children
def __urnitem__(self, key): urn = URN(key) if len(urn) <= 2: raise ValueError("Not valid urn") elif hasattr(self, "urn") and self.urn == urn: return self else: if hasattr(self, "urn"): i = len(self.urn) else: i = 2 if isinstance(self, TextInventory): children = self.textgroups elif isinstance(self, TextGroup): children = self.works elif isinstance(self, Work): children = self.texts order = ["", "", URN.TEXTGROUP, URN.WORK, URN.VERSION] while i <= len(urn) - 1: children = children[urn.upTo(order[i])] if not hasattr(children, "urn") or str( children.urn) != urn.upTo(order[i]): error = "Unrecognized urn at " + [ "URN namespace", "CTS Namespace", "URN Textgroup", "URN Work", "URN Version" ][i] raise ValueError(error) i += 1 return children
def urn(self, value): """ Set the urn :param value: URN to be saved :type value: URN, basestring, str :raises: *TypeError* when the value is not URN compatible .. note:: `Passage.URN = ...` will update automatically the id property if Passage is set """ a = self._URN if isinstance(value, basestring): value = URN(value) elif not isinstance(value, URN): raise TypeError() if str(a) != str(value): self._URN = value if value.reference and self.__reference != value.reference: self.__reference = value.reference elif not value.reference and self.__reference and len( self.__reference): self._URN = URN("{}:{}".format(str(value), str(self.__reference)))
def test_equality(self): a = URN("urn:cts:greekLit:tlg0012.tlg001.mth-01:[email protected]@the[2]") b = URN("urn:cts:greekLit:tlg0012.tlg001.mth-01:[email protected]@the[2]") c = URN("urn:cts:greekLit:tlg0012.tlg001.mth-01:[email protected]@the[3]") d = "urn:cts:greekLit:tlg0012.tlg001.mth-01:[email protected]@the[2]" self.assertEqual(a, b) self.assertNotEqual(a, c) self.assertNotEqual(a, d)
def test_from_work_emptiness(self): a = URN("urn:cts:greekLit:textgroup.work") self.assertEqual(str(a), "urn:cts:greekLit:textgroup.work") self.assertEqual(a.upTo(URN.COMPLETE), "urn:cts:greekLit:textgroup.work") self.assertEqual(a.upTo(URN.NAMESPACE), "urn:cts:greekLit") self.assertEqual(a.upTo(URN.TEXTGROUP), "urn:cts:greekLit:textgroup") self.assertEqual(a.upTo(URN.WORK), "urn:cts:greekLit:textgroup.work") self.assertIsNone(a.version) self.assertIsNone(a.reference)
def test_full_emptiness(self): a = URN("urn:cts:greekLit") self.assertEqual(str(a), "urn:cts:greekLit") self.assertEqual(a.upTo(URN.COMPLETE), "urn:cts:greekLit") self.assertEqual(a.upTo(URN.NAMESPACE), "urn:cts:greekLit") self.assertIsNone(a.textgroup) self.assertIsNone(a.work) self.assertIsNone(a.version) self.assertIsNone(a.reference)
def test_from_textgroup_emptiness(self): a = URN("urn:cts:greekLit:textgroup") self.assertEqual(a.upTo(URN.COMPLETE), "urn:cts:greekLit:textgroup") self.assertEqual(str(a), "urn:cts:greekLit:textgroup") self.assertEqual(a.upTo(URN.NAMESPACE), "urn:cts:greekLit") self.assertEqual(a.upTo(URN.TEXTGROUP), "urn:cts:greekLit:textgroup") self.assertIsNone(a.work) self.assertIsNone(a.version) self.assertIsNone(a.reference)
def test_from_text_emptiness(self): a = URN("urn:cts:greekLit:textgroup.work.text") self.assertEqual(str(a), "urn:cts:greekLit:textgroup.work.text") self.assertEqual(a.upTo(URN.COMPLETE), "urn:cts:greekLit:textgroup.work.text") self.assertEqual(a.upTo(URN.NAMESPACE), "urn:cts:greekLit") self.assertEqual(a.upTo(URN.TEXTGROUP), "urn:cts:greekLit:textgroup") self.assertEqual(a.upTo(URN.WORK), "urn:cts:greekLit:textgroup.work") self.assertEqual(a.upTo(URN.VERSION), "urn:cts:greekLit:textgroup.work.text") self.assertIsNone(a.reference)
def test_properties(self): a = URN("urn:cts:greekLit:tlg0012.tlg001.mth-01:[email protected]@the[2]") self.assertEqual(a.urn_namespace, "cts") a.urn_namespace = "dts" self.assertEqual(a.urn_namespace, "dts") self.assertEqual(a.namespace, "greekLit") self.assertEqual(a.textgroup, "tlg0012") self.assertEqual(a.work, "tlg001") self.assertEqual(a.version, "mth-01") self.assertEqual(a.reference, Reference("[email protected]@the[2]"))
def test_resource_parser(self): """ Test that the initiation finds correctly the resources """ Repository = NautilusCTSResolver(["./tests/testing_data/farsiLit"]) self.assertEqual(Repository.inventory["urn:cts:farsiLit:hafez"].urn, URN("urn:cts:farsiLit:hafez"), "Hafez is found") self.assertEqual( len(Repository.inventory["urn:cts:farsiLit:hafez"].works), 1, "Hafez has one child") self.assertEqual( Repository.inventory["urn:cts:farsiLit:hafez.divan"].urn, URN("urn:cts:farsiLit:hafez.divan"), "Divan is found") self.assertEqual( len(Repository.inventory["urn:cts:farsiLit:hafez.divan"].texts), 3, "Divan has 3 children")
def __init__(self, resource=None, urn=None, parents=None, subtype="Edition"): self.resource = None self.citation = None self.lang = None self.urn = None self.docname = None self.parents = list() self.subtype = subtype self.validate = None self.metadata = Metadata( keys=["label", "description", "namespaceMapping"]) if urn is not None: self.urn = URN(urn) if parents is not None: self.parents = parents self.lang = self.parents[0].lang if resource is not None: self.setResource(resource)
def test_getpassage_variabletypes(self, requests): text = CtsText("urn:cts:latinLit:phi1294.phi002.perseus-lat2", self.endpoint, citation=self.citation) requests.return_value.text = GET_PASSAGE # Test with -1 _ = text.getTextualNode(subreference=Reference("1.1")) requests.assert_called_with( "http://services.perseids.org/remote/cts", params={ "request": "GetPassage", "urn": "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1" }) # Test with -1 _ = text.getTextualNode(subreference=URN( "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.2")) requests.assert_called_with( "http://services.perseids.org/remote/cts", params={ "request": "GetPassage", "urn": "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.2" }) # Test with -1 _ = text.getTextualNode(subreference=["1", "1", "1"]) requests.assert_called_with( "http://services.perseids.org/remote/cts", params={ "request": "GetPassage", "urn": "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1.1" })
def __init__(self, urn="", parent=None): super(CtsTextgroupMetadata, self).__init__(identifier=str(urn)) self.__urn__ = URN(urn) self.__children__ = defaultdict(CtsWorkMetadata) if parent is not None: self.parent = parent
def test_upTo(self): a = URN("urn:cts:greekLit:tlg0012.tlg001.mth-01:[email protected]@the[2]") self.assertEqual(a.upTo(URN.COMPLETE), "urn:cts:greekLit:tlg0012.tlg001.mth-01:[email protected]@the[2]") self.assertEqual(a.upTo(URN.NAMESPACE), "urn:cts:greekLit") self.assertEqual(a.upTo(URN.TEXTGROUP), "urn:cts:greekLit:tlg0012") self.assertEqual(a.upTo(URN.WORK), "urn:cts:greekLit:tlg0012.tlg001") self.assertEqual(a.upTo(URN.VERSION), "urn:cts:greekLit:tlg0012.tlg001.mth-01") self.assertEqual(a.upTo(URN.PASSAGE), "urn:cts:greekLit:tlg0012.tlg001.mth-01:[email protected]@the[2]") self.assertEqual(a.upTo(URN.PASSAGE_START), "urn:cts:greekLit:tlg0012.tlg001.mth-01:1.1@Achilles") self.assertEqual(a.upTo(URN.PASSAGE_END), "urn:cts:greekLit:tlg0012.tlg001.mth-01:1.10@the[2]")
def getTextualNode(self, subreference=None, simple=False): """ Finds a passage in the current text :param subreference: Identifier of the subreference / passages :type subreference: Union[list, CtsReference] :param simple: If set to true, retrieves nodes up to the given one, cleaning non required siblings. :type simple: boolean :rtype: CapitainsCtsPassage, ContextPassage :returns: Asked passage """ if subreference is None: return self._getSimplePassage() if not isinstance(subreference, CtsReference): if isinstance(subreference, str): subreference = CtsReference(subreference) elif isinstance(subreference, list): subreference = CtsReference(".".join(subreference)) if len(subreference.start) > self.citation.root.depth: raise CitationDepthError("URN is deeper than citation scheme") if simple is True: return self._getSimplePassage(subreference) if not subreference.is_range(): start = end = subreference.start.list else: start, end = subreference.start.list, subreference.end.list citation_start = self.citation.root[len(start) - 1] citation_end = self.citation.root[len(end) - 1] start, end = citation_start.fill(passage=start), citation_end.fill( passage=end) start, end = normalizeXpath(start.split("/")[2:]), normalizeXpath( end.split("/")[2:]) xml = self.textObject.xml if isinstance(xml, etree._Element): root = copyNode(xml) else: root = copyNode(xml.getroot()) root = passageLoop(xml, root, start, end) if self.urn: urn = URN("{}:{}".format(self.urn, subreference)) else: urn = None return CapitainsCtsPassage(urn=urn, resource=root, text=self, citation=citation_start, reference=subreference)
def test_missing_text_in_passage_emptiness(self): a = URN("urn:cts:greekLit:textgroup.work:1-2") self.assertEqual(str(a), "urn:cts:greekLit:textgroup.work:1-2") self.assertEqual(a.upTo(URN.COMPLETE), "urn:cts:greekLit:textgroup.work:1-2") self.assertEqual(a.upTo(URN.NAMESPACE), "urn:cts:greekLit") self.assertEqual(a.upTo(URN.TEXTGROUP), "urn:cts:greekLit:textgroup") self.assertEqual(a.upTo(URN.WORK), "urn:cts:greekLit:textgroup.work") self.assertEqual(a.upTo(URN.NO_PASSAGE), "urn:cts:greekLit:textgroup.work") self.assertEqual(a.upTo(URN.PASSAGE), "urn:cts:greekLit:textgroup.work:1-2") self.assertEqual(a.upTo(URN.PASSAGE_START), "urn:cts:greekLit:textgroup.work:1") self.assertEqual(a.upTo(URN.PASSAGE_END), "urn:cts:greekLit:textgroup.work:2") self.assertEqual(a.reference, Reference("1-2")) self.assertEqual(a.reference.start, Reference("1")) self.assertEqual(a.reference.end, Reference("2")) self.assertIsNone(a.version)
def test_init(self): a = PrototypeCtsNode( urn="urn:cts:latinLit:phi1294.phi002.perseus-lat2") self.assertEqual(a.id, "urn:cts:latinLit:phi1294.phi002.perseus-lat2") self.assertEqual(a.urn, URN("urn:cts:latinLit:phi1294.phi002.perseus-lat2")) self.assertIsInstance(a.citation, Citation) a.resource = True self.assertEqual(a.resource, True)
def __getText__(self, urn): if not isinstance(urn, URN): urn = URN(urn) if len(urn) != 5: # this is different from MyCapytain in that we don't need to look # the first passage. let's always assume we get the right thing. raise InvalidURN() metadata = self.inventory[str(urn)] text = self.load_text(metadata.path) return text, metadata
def _r_GetFirstUrn(self, urn): """ Provisional route for GetFirstUrn request :param urn: URN to filter the resource :param inv: Inventory Identifier :return: GetFirstUrn response """ urn = URN(urn) subreference = None textId = urn.upTo(URN.NO_PASSAGE) if urn.reference is not None: subreference = str(urn.reference) firstId = self.resolver.getTextualNode( textId=textId, subreference=subreference).firstId r = render_template("cts/GetFirstUrn.xml", firstId=firstId, full_urn=textId, request_urn=str(urn)) return r, 200, {"content-type": "application/xml"}
def _r_GetPrevNext(self, urn): """ Provisional route for GetPrevNext request :param urn: URN to filter the resource :param inv: Inventory Identifier :return: GetPrevNext response """ urn = URN(urn) subreference = None textId = urn.upTo(URN.NO_PASSAGE) if urn.reference is not None: subreference = str(urn.reference) previous, nextious = self.resolver.getSiblings( textId=textId, subreference=subreference) r = render_template("cts/GetPrevNext.xml", prev_urn=previous, next_urn=nextious, urn=textId, request_urn=str(urn)) return r, 200, {"content-type": "application/xml"}
def test_deep_first(self): # Deep Annotations hits, annotations = self.query.getAnnotations( URN("urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.pr.1-1.pr.3")) self.assertEqual(annotations[0], self.four, "Deepest node should match as well") # Test with tuple hits, annotations = self.query.getAnnotations( ("urn:cts:latinLit:phi1294.phi002.perseus-lat2", "1.pr.1-1.pr.3")) self.assertEqual(annotations[0], self.four, "Deepest node should match as well")
def getPrevNextUrn(self, reference): """ Get the previous URN of a reference of the text :param reference: Reference from which to find siblings :type reference: Union[Reference, str] :return: (Previous CapitainsCtsPassage Reference,Next CapitainsCtsPassage Reference) """ _prev, _next = __SharedMethod__.prevnext( self.retriever.getPrevNextUrn(urn="{}:{}".format( str(URN(str(self.urn)).upTo(URN.NO_PASSAGE)), str(reference)))) return _prev, _next
def _r_GetValidReff(self, urn, level): """ Provisional route for GetValidReff request :param urn: URN to filter the resource :param inv: Inventory Identifier :return: GetValidReff response """ urn = URN(urn) subreference = None textId = urn.upTo(URN.NO_PASSAGE) if urn.reference is not None: subreference = str(urn.reference) reffs = self.resolver.getReffs(textId=textId, subreference=subreference, level=level) r = render_template("cts/GetValidReff.xml", reffs=reffs, urn=textId, level=level, request_urn=str(urn)) return r, 200, {"content-type": "application/xml"}
def urn(self, value: Union[URN, str]): """ Set the urn :param value: URN to be saved :raises: *TypeError* when the value is not URN compatible """ if isinstance(value, str): value = URN(value) elif not isinstance(value, URN): raise TypeError("New urn must be string or {} instead of {}".format(type(URN), type(value))) self._urn = value
def test_set(self): a = URN("urn:cts:latinLit:phi1294.phi002.perseus-lat2") a.reference = Reference("1.1") self.assertEqual(str(a), "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1") a.reference = "2.2" self.assertEqual(str(a), "urn:cts:latinLit:phi1294.phi002.perseus-lat2:2.2") a.version = "perseus-eng2" self.assertEqual(str(a), "urn:cts:latinLit:phi1294.phi002.perseus-eng2:2.2") a.work = "phi001" self.assertEqual(str(a), "urn:cts:latinLit:phi1294.phi001.perseus-eng2:2.2") a.textgroup = "phi1293" self.assertEqual(str(a), "urn:cts:latinLit:phi1293.phi001.perseus-eng2:2.2") a.namespace = "greekLit" self.assertEqual(str(a), "urn:cts:greekLit:phi1293.phi001.perseus-eng2:2.2")
def _r_GetPassagePlus(self, urn): """ Provisional route for GetPassagePlus request :param urn: URN to filter the resource :param inv: Inventory Identifier :return: GetPassagePlus response """ urn = URN(urn) subreference = None if len(urn) < 4: raise InvalidURN if urn.reference is not None: subreference = str(urn.reference) node = self.resolver.getTextualNode(textId=urn.upTo(URN.NO_PASSAGE), subreference=subreference) r = render_template( "cts/GetPassagePlus.xml", filters="urn={}".format(urn), request_urn=str(urn), full_urn=node.urn, prev_urn=node.prevId, next_urn=node.nextId, metadata={ "groupname": [(literal.language, str(literal)) for literal in node.metadata.get(RDF_NAMESPACES.CTS.groupname) ], "title": [(literal.language, str(literal)) for literal in node.metadata.get(RDF_NAMESPACES.CTS.title)], "description": [(literal.language, str(literal)) for literal in node.metadata.get( RDF_NAMESPACES.CTS.description)], "label": [(literal.language, str(literal)) for literal in node.metadata.get(RDF_NAMESPACES.CTS.label)] }, citation=Markup(node.citation.export(Mimetypes.XML.CTS)), passage=Markup(node.export(Mimetypes.XML.TEI))) return r, 200, {"content-type": "application/xml"}
def setUp(self): self.resolver = Resolver(LocalRetriever(path="./tests/test_data/")) self.one = (URN("urn:cts:latinLit:phi1294.phi002.perseus-lat2:6.1"), "interface/treebanks/treebank1.xml", "dc:treebank") self.two = (URN("urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.5"), "interface/treebanks/treebank2.xml", "dc:treebank") self.three = (URN("urn:cts:latinLit:phi1294.phi002.perseus-lat2:6.1"), "interface/images/N0060308_TIFF_145_145.tif", "dc:image") self.four = AnnotationResource( "interface/researchobject/researchobject.json", type_uri="dc:researchobject", target=URN("urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.pr.1"), resolver=self.resolver) self.one_anno = AnnotationResource(self.one[1], type_uri=self.one[2], target=self.one[0], resolver=self.resolver) self.two_anno = AnnotationResource(self.two[1], type_uri=self.two[2], target=self.two[0], resolver=self.resolver) self.three_anno = AnnotationResource(self.three[1], type_uri=self.three[2], target=self.three[0], resolver=self.resolver) self.fourth_anno = AnnotationResource( self.three[1], type_uri=self.three[2], target=("urn:cts:latinLit:phi1294.phi002.perseus-lat2", "1-2"), resolver=self.resolver) self.app = Flask("app") logger = logging.getLogger('my-logger') logger.propagate = False self.nautilus = CtsCapitainsLocalResolver( ["tests/test_data/interface/latinLit"], logger=logger) self.nemo = Nemo(app=self.app, resolver=self.nautilus, base_url="") self.query = SimpleQuery( [self.one, self.two, self.three, self.four], # List of annotations self.resolver) self.query.process(self.nemo)
def _r_GetPassage(self, urn): """ Provisional route for GetPassage request :param urn: URN to filter the resource :param inv: Inventory Identifier :return: GetPassage response """ urn = URN(urn) subreference = None if len(urn) < 4: raise InvalidURN if urn.reference is not None: subreference = str(urn.reference) node = self.resolver.getTextualNode(textId=urn.upTo(URN.NO_PASSAGE), subreference=subreference) r = render_template("cts/GetPassage.xml", filters="urn={}".format(urn), request_urn=str(urn), full_urn=node.urn, passage=Markup(node.export(Mimetypes.XML.TEI))) return r, 200, {"content-type": "application/xml"}
def __getText__(self, urn): """ Returns a PrototypeText object :param urn: URN of a text to retrieve :type urn: str, URN :return: Textual resource and metadata :rtype: (Text, InventoryText) """ if not isinstance(urn, URN): urn = URN(urn) if len(urn) != 5: if len(urn) == 4: urn, reference = urn.upTo(URN.WORK), str(urn.reference) urn = [ t.id for t in self.texts if t.id.startswith(str(urn)) and isinstance(t, Edition) ] if len(urn) > 0: urn = URN(urn[0]) else: raise UnknownCollection else: raise InvalidURN try: text = self.inventory[str(urn)] except MyCapytain.errors.UnknownCollection as E: raise UnknownCollection(str(E)) except Exception as E: raise E if os.path.isfile(text.path): resource = self.read(identifier=urn, path=text.path) else: resource = None raise UnknownCollection("File matching %s does not exist" % text.path) return resource, text
def test_init(self): """ Assert initation takes into account params """ self.assertEqual( self.alias("urn:cts:latinLit:phi1294.phi002.perseus-lat2").objectId, "urn:cts:latinLit:phi1294.phi002.perseus-lat2", "String are taken as parameter" ) self.assertEqual( self.alias(URN("urn:cts:latinLit:phi1294.phi002.perseus-lat2")).objectId, "urn:cts:latinLit:phi1294.phi002.perseus-lat2", "URN are taken as parameter" )
def urn(self, value): """ Set the urn :param value: URN to be saved :type value: URN :raises: *TypeError* when the value is not URN compatible """ if isinstance(value, text_type): value = URN(value) elif not isinstance(value, URN): raise TypeError() self.__urn__ = value
def test_no_end_text_emptiness(self): a = URN("urn:cts:greekLit:textgroup.work.text:1") self.assertEqual(str(a), "urn:cts:greekLit:textgroup.work.text:1") self.assertEqual(a.upTo(URN.COMPLETE), "urn:cts:greekLit:textgroup.work.text:1") self.assertEqual(a.upTo(URN.NAMESPACE), "urn:cts:greekLit") self.assertEqual(a.upTo(URN.TEXTGROUP), "urn:cts:greekLit:textgroup") self.assertEqual(a.upTo(URN.WORK), "urn:cts:greekLit:textgroup.work") self.assertEqual(a.upTo(URN.VERSION), "urn:cts:greekLit:textgroup.work.text") self.assertEqual(a.upTo(URN.PASSAGE), "urn:cts:greekLit:textgroup.work.text:1") self.assertEqual(a.upTo(URN.NO_PASSAGE), "urn:cts:greekLit:textgroup.work.text") self.assertEqual(a.reference, Reference("1")) self.assertIsNone(a.reference.end)
def test_set(self): a = URN("urn:cts:greekLit:textgroup") a.textgroup = "tg" self.assertEqual(a.textgroup, "tg") self.assertEqual(str(a), "urn:cts:greekLit:tg") a.namespace = "ns" self.assertEqual(a.namespace, "ns") self.assertEqual(str(a), "urn:cts:ns:tg") a.work = "wk" self.assertEqual(a.work, "wk") self.assertEqual(str(a), "urn:cts:ns:tg.wk") a.reference = "1-2" self.assertEqual(a.reference, Reference("1-2")) self.assertEqual(str(a), "urn:cts:ns:tg.wk:1-2") a.version = "vs" self.assertEqual(a.version, "vs") self.assertEqual(str(a), "urn:cts:ns:tg.wk.vs:1-2")
def test_warning_on_empty(self): with self.assertRaises(ValueError): a = URN("urn:cts") with self.assertRaises(KeyError): a = URN("urn:cts:ns:tg.work:1") a.upTo(URN.VERSION)