def urn(self, value): """ Set the urn :param value: URN to be saved :type value: URN, basestring, str :raises: *TypeError* when the value is not URN compatible .. note:: `Passage.URN = ...` will update automatically the id property if Passage is set """ a = self._URN if isinstance(value, basestring): value = URN(value) elif not isinstance(value, URN): raise TypeError() if str(a) != str(value): self._URN = value if value.reference and self.__reference != value.reference: self.__reference = value.reference elif not value.reference and self.__reference and len( self.__reference): self._URN = URN("{}:{}".format(str(value), str(self.__reference)))
def __getText__(self, urn): """ Returns a CtsTextMetadata object :param urn: URN of a text to retrieve :type urn: str, URN :return: Textual resource and metadata :rtype: (CapitainsCtsText, InventoryText) """ if not isinstance(urn, URN): urn = URN(urn) if len(urn) != 5: if len(urn) == 4: urn, reference = urn.upTo(URN.WORK), str(urn.reference) urn = [ t.id for t in self.texts if t.id.startswith(str(urn)) and isinstance(t, XmlCtsEditionMetadata) ] if len(urn) > 0: urn = URN(urn[0]) else: raise UnknownObjectError else: raise InvalidURN text = self.inventory[str(urn)] if os.path.isfile(text.path): with io.open(text.path) as __xml__: resource = self.TEXT_CLASS(urn=urn, resource=self.xmlparse(__xml__)) else: resource = None self.logger.warning('The file {} is mentioned in the metadata but does not exist'.format(text.path)) return resource, text
def test_equality(self): a = URN("urn:cts:greekLit:tlg0012.tlg001.mth-01:[email protected]@the[2]") b = URN("urn:cts:greekLit:tlg0012.tlg001.mth-01:[email protected]@the[2]") c = URN("urn:cts:greekLit:tlg0012.tlg001.mth-01:[email protected]@the[3]") d = "urn:cts:greekLit:tlg0012.tlg001.mth-01:[email protected]@the[2]" self.assertEqual(a, b) self.assertNotEqual(a, c) self.assertNotEqual(a, d)
def test_resource_parser(self): """ Test that the initiation finds correctly the resources """ Repository = NautilusCTSResolver(["./tests/testing_data/farsiLit"]) self.assertEqual(Repository.inventory["urn:cts:farsiLit:hafez"].urn, URN("urn:cts:farsiLit:hafez"), "Hafez is found") self.assertEqual( len(Repository.inventory["urn:cts:farsiLit:hafez"].works), 1, "Hafez has one child") self.assertEqual( Repository.inventory["urn:cts:farsiLit:hafez.divan"].urn, URN("urn:cts:farsiLit:hafez.divan"), "Divan is found") self.assertEqual( len(Repository.inventory["urn:cts:farsiLit:hafez.divan"].texts), 3, "Divan has 3 children")
def test_getpassage_variabletypes(self, requests): text = CtsText("urn:cts:latinLit:phi1294.phi002.perseus-lat2", self.endpoint, citation=self.citation) requests.return_value.text = GET_PASSAGE # Test with -1 _ = text.getTextualNode(subreference=Reference("1.1")) requests.assert_called_with( "http://services.perseids.org/remote/cts", params={ "request": "GetPassage", "urn": "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1" }) # Test with -1 _ = text.getTextualNode(subreference=URN( "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.2")) requests.assert_called_with( "http://services.perseids.org/remote/cts", params={ "request": "GetPassage", "urn": "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.2" }) # Test with -1 _ = text.getTextualNode(subreference=["1", "1", "1"]) requests.assert_called_with( "http://services.perseids.org/remote/cts", params={ "request": "GetPassage", "urn": "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1.1" })
def __init__(self, urn="", parent=None): super(CtsTextgroupMetadata, self).__init__(identifier=str(urn)) self.__urn__ = URN(urn) self.__children__ = defaultdict(CtsWorkMetadata) if parent is not None: self.parent = parent
def __urnitem__(self, key): urn = URN(key) if len(urn) <= 2: raise ValueError("Not valid urn") elif hasattr(self, "urn") and self.urn == urn: return self else: if hasattr(self, "urn"): i = len(self.urn) else: i = 2 if isinstance(self, TextInventory): children = self.textgroups elif isinstance(self, TextGroup): children = self.works elif isinstance(self, Work): children = self.texts order = ["", "", URN.TEXTGROUP, URN.WORK, URN.VERSION] while i <= len(urn) - 1: children = children[urn.upTo(order[i])] if not hasattr(children, "urn") or str( children.urn) != urn.upTo(order[i]): error = "Unrecognized urn at " + [ "URN namespace", "CTS Namespace", "URN Textgroup", "URN Work", "URN Version" ][i] raise ValueError(error) i += 1 return children
def __init__(self, resource=None, urn=None, parents=None, subtype="Edition"): self.resource = None self.citation = None self.lang = None self.urn = None self.docname = None self.parents = list() self.subtype = subtype self.validate = None self.metadata = Metadata( keys=["label", "description", "namespaceMapping"]) if urn is not None: self.urn = URN(urn) if parents is not None: self.parents = parents self.lang = self.parents[0].lang if resource is not None: self.setResource(resource)
def test_from_textgroup_emptiness(self): a = URN("urn:cts:greekLit:textgroup") self.assertEqual(a.upTo(URN.COMPLETE), "urn:cts:greekLit:textgroup") self.assertEqual(str(a), "urn:cts:greekLit:textgroup") self.assertEqual(a.upTo(URN.NAMESPACE), "urn:cts:greekLit") self.assertEqual(a.upTo(URN.TEXTGROUP), "urn:cts:greekLit:textgroup") self.assertIsNone(a.work) self.assertIsNone(a.version) self.assertIsNone(a.reference)
def test_from_text_emptiness(self): a = URN("urn:cts:greekLit:textgroup.work.text") self.assertEqual(str(a), "urn:cts:greekLit:textgroup.work.text") self.assertEqual(a.upTo(URN.COMPLETE), "urn:cts:greekLit:textgroup.work.text") self.assertEqual(a.upTo(URN.NAMESPACE), "urn:cts:greekLit") self.assertEqual(a.upTo(URN.TEXTGROUP), "urn:cts:greekLit:textgroup") self.assertEqual(a.upTo(URN.WORK), "urn:cts:greekLit:textgroup.work") self.assertEqual(a.upTo(URN.VERSION), "urn:cts:greekLit:textgroup.work.text") self.assertIsNone(a.reference)
def getTextualNode(self, subreference=None, simple=False): """ Finds a passage in the current text :param subreference: Identifier of the subreference / passages :type subreference: Union[list, CtsReference] :param simple: If set to true, retrieves nodes up to the given one, cleaning non required siblings. :type simple: boolean :rtype: CapitainsCtsPassage, ContextPassage :returns: Asked passage """ if subreference is None: return self._getSimplePassage() if not isinstance(subreference, CtsReference): if isinstance(subreference, str): subreference = CtsReference(subreference) elif isinstance(subreference, list): subreference = CtsReference(".".join(subreference)) if len(subreference.start) > self.citation.root.depth: raise CitationDepthError("URN is deeper than citation scheme") if simple is True: return self._getSimplePassage(subreference) if not subreference.is_range(): start = end = subreference.start.list else: start, end = subreference.start.list, subreference.end.list citation_start = self.citation.root[len(start) - 1] citation_end = self.citation.root[len(end) - 1] start, end = citation_start.fill(passage=start), citation_end.fill( passage=end) start, end = normalizeXpath(start.split("/")[2:]), normalizeXpath( end.split("/")[2:]) xml = self.textObject.xml if isinstance(xml, etree._Element): root = copyNode(xml) else: root = copyNode(xml.getroot()) root = passageLoop(xml, root, start, end) if self.urn: urn = URN("{}:{}".format(self.urn, subreference)) else: urn = None return CapitainsCtsPassage(urn=urn, resource=root, text=self, citation=citation_start, reference=subreference)
def test_properties(self): a = URN( "urn:cts:greekLit:tlg0012.tlg001.mth-01:[email protected]@the[2]") self.assertEqual(a.urn_namespace, "cts") self.assertEqual(a.namespace, "greekLit") self.assertEqual(a.textgroup, "tlg0012") self.assertEqual(a.work, "tlg001") self.assertEqual(a.version, "mth-01") self.assertEqual(a.reference, Reference("[email protected]@the[2]"))
def test_init(self): a = PrototypeCtsNode( urn="urn:cts:latinLit:phi1294.phi002.perseus-lat2") self.assertEqual(a.id, "urn:cts:latinLit:phi1294.phi002.perseus-lat2") self.assertEqual(a.urn, URN("urn:cts:latinLit:phi1294.phi002.perseus-lat2")) self.assertIsInstance(a.citation, Citation) a.resource = True self.assertEqual(a.resource, True)
def test_upTo(self): a = URN("urn:cts:greekLit:tlg0012.tlg001.mth-01:[email protected]@the[2]") self.assertEqual(a.upTo(URN.COMPLETE), "urn:cts:greekLit:tlg0012.tlg001.mth-01:[email protected]@the[2]") self.assertEqual(a.upTo(URN.NAMESPACE), "urn:cts:greekLit") self.assertEqual(a.upTo(URN.TEXTGROUP), "urn:cts:greekLit:tlg0012") self.assertEqual(a.upTo(URN.WORK), "urn:cts:greekLit:tlg0012.tlg001") self.assertEqual(a.upTo(URN.VERSION), "urn:cts:greekLit:tlg0012.tlg001.mth-01") self.assertEqual(a.upTo(URN.PASSAGE), "urn:cts:greekLit:tlg0012.tlg001.mth-01:[email protected]@the[2]") self.assertEqual(a.upTo(URN.PASSAGE_START), "urn:cts:greekLit:tlg0012.tlg001.mth-01:1.1@Achilles") self.assertEqual(a.upTo(URN.PASSAGE_END), "urn:cts:greekLit:tlg0012.tlg001.mth-01:1.10@the[2]")
def __getText__(self, urn): if not isinstance(urn, URN): urn = URN(urn) if len(urn) != 5: # this is different from MyCapytain in that we don't need to look # the first passage. let's always assume we get the right thing. raise InvalidURN() metadata = self.inventory[str(urn)] text = self.load_text(metadata.path) return text, metadata
def test_deep_first(self): # Deep Annotations hits, annotations = self.query.getAnnotations( URN("urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.pr.1-1.pr.3")) self.assertEqual(annotations[0], self.four, "Deepest node should match as well") # Test with tuple hits, annotations = self.query.getAnnotations( ("urn:cts:latinLit:phi1294.phi002.perseus-lat2", "1.pr.1-1.pr.3")) self.assertEqual(annotations[0], self.four, "Deepest node should match as well")
def getPrevNextUrn(self, reference): """ Get the previous URN of a reference of the text :param reference: Reference from which to find siblings :type reference: Union[Reference, str] :return: (Previous CapitainsCtsPassage Reference,Next CapitainsCtsPassage Reference) """ _prev, _next = __SharedMethod__.prevnext( self.retriever.getPrevNextUrn(urn="{}:{}".format( str(URN(str(self.urn)).upTo(URN.NO_PASSAGE)), str(reference)))) return _prev, _next
def urn(self, value: Union[URN, str]): """ Set the urn :param value: URN to be saved :raises: *TypeError* when the value is not URN compatible """ if isinstance(value, str): value = URN(value) elif not isinstance(value, URN): raise TypeError("New urn must be string or {} instead of {}".format(type(URN), type(value))) self._urn = value
def test_no_end_text_emptiness(self): a = URN("urn:cts:greekLit:textgroup.work.text:1") self.assertEqual(str(a), "urn:cts:greekLit:textgroup.work.text:1") self.assertEqual(a.upTo(URN.COMPLETE), "urn:cts:greekLit:textgroup.work.text:1") self.assertEqual(a.upTo(URN.NAMESPACE), "urn:cts:greekLit") self.assertEqual(a.upTo(URN.TEXTGROUP), "urn:cts:greekLit:textgroup") self.assertEqual(a.upTo(URN.WORK), "urn:cts:greekLit:textgroup.work") self.assertEqual(a.upTo(URN.VERSION), "urn:cts:greekLit:textgroup.work.text") self.assertEqual(a.upTo(URN.PASSAGE), "urn:cts:greekLit:textgroup.work.text:1") self.assertEqual(a.upTo(URN.NO_PASSAGE), "urn:cts:greekLit:textgroup.work.text") self.assertEqual(a.reference, Reference("1")) self.assertIsNone(a.reference.end)
def setUp(self): self.resolver = Resolver(LocalRetriever(path="./tests/test_data/")) self.one = (URN("urn:cts:latinLit:phi1294.phi002.perseus-lat2:6.1"), "interface/treebanks/treebank1.xml", "dc:treebank") self.two = (URN("urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.5"), "interface/treebanks/treebank2.xml", "dc:treebank") self.three = (URN("urn:cts:latinLit:phi1294.phi002.perseus-lat2:6.1"), "interface/images/N0060308_TIFF_145_145.tif", "dc:image") self.four = AnnotationResource( "interface/researchobject/researchobject.json", type_uri="dc:researchobject", target=URN("urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.pr.1"), resolver=self.resolver) self.one_anno = AnnotationResource(self.one[1], type_uri=self.one[2], target=self.one[0], resolver=self.resolver) self.two_anno = AnnotationResource(self.two[1], type_uri=self.two[2], target=self.two[0], resolver=self.resolver) self.three_anno = AnnotationResource(self.three[1], type_uri=self.three[2], target=self.three[0], resolver=self.resolver) self.fourth_anno = AnnotationResource( self.three[1], type_uri=self.three[2], target=("urn:cts:latinLit:phi1294.phi002.perseus-lat2", "1-2"), resolver=self.resolver) self.app = Flask("app") logger = logging.getLogger('my-logger') logger.propagate = False self.nautilus = CtsCapitainsLocalResolver( ["tests/test_data/interface/latinLit"], logger=logger) self.nemo = Nemo(app=self.app, resolver=self.nautilus, base_url="") self.query = SimpleQuery( [self.one, self.two, self.three, self.four], # List of annotations self.resolver) self.query.process(self.nemo)
def test_init(self): """ Assert initation takes into account params """ self.assertEqual( self.alias("urn:cts:latinLit:phi1294.phi002.perseus-lat2").objectId, "urn:cts:latinLit:phi1294.phi002.perseus-lat2", "String are taken as parameter" ) self.assertEqual( self.alias(URN("urn:cts:latinLit:phi1294.phi002.perseus-lat2")).objectId, "urn:cts:latinLit:phi1294.phi002.perseus-lat2", "URN are taken as parameter" )
def __getText__(self, urn): """ Returns a PrototypeText object :param urn: URN of a text to retrieve :type urn: str, URN :return: Textual resource and metadata :rtype: (Text, InventoryText) """ if not isinstance(urn, URN): urn = URN(urn) if len(urn) != 5: if len(urn) == 4: urn, reference = urn.upTo(URN.WORK), str(urn.reference) urn = [ t.id for t in self.texts if t.id.startswith(str(urn)) and isinstance(t, Edition) ] if len(urn) > 0: urn = URN(urn[0]) else: raise UnknownCollection else: raise InvalidURN try: text = self.inventory[str(urn)] except MyCapytain.errors.UnknownCollection as E: raise UnknownCollection(str(E)) except Exception as E: raise E if os.path.isfile(text.path): resource = self.read(identifier=urn, path=text.path) else: resource = None raise UnknownCollection("File matching %s does not exist" % text.path) return resource, text
def urn(self, value): """ Set the urn :param value: URN to be saved :type value: URN :raises: *TypeError* when the value is not URN compatible """ if isinstance(value, text_type): value = URN(value) elif not isinstance(value, URN): raise TypeError() self.__urn__ = value
def __makePassageKwargs__(urn, reference): """ Little helper used by CapitainsCtsPassage here to comply with parents args :param urn: URN String :param reference: Reference String :return: Dictionary of arguments with URN based on identifier and reference """ kwargs = {} if urn is not None: if reference is not None: kwargs["urn"] = URN("{}:{}".format(urn.upTo(URN.VERSION), reference)) else: kwargs["urn"] = urn return kwargs
def test_set(self): a = URN("urn:cts:latinLit:phi1294.phi002.perseus-lat2") a.reference = Reference("1.1") self.assertEqual(str(a), "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1") a.reference = "2.2" self.assertEqual(str(a), "urn:cts:latinLit:phi1294.phi002.perseus-lat2:2.2") a.version = "perseus-eng2" self.assertEqual(str(a), "urn:cts:latinLit:phi1294.phi002.perseus-eng2:2.2") a.work = "phi001" self.assertEqual(str(a), "urn:cts:latinLit:phi1294.phi001.perseus-eng2:2.2") a.textgroup = "phi1293" self.assertEqual(str(a), "urn:cts:latinLit:phi1293.phi001.perseus-eng2:2.2") a.namespace = "greekLit" self.assertEqual(str(a), "urn:cts:greekLit:phi1293.phi001.perseus-eng2:2.2")
def setUp(self): self.resolver = Resolver( RetrieverMock ) self.params_1 = [ "http://localhost", URN("urn:cts:latinLit:phi1294.phi002.perseus-lat2"), "http://foo.bar/treebank", self.resolver, Target, "application/xml", "treebank" ] self.params_2 = [ "http://localhost1", {"urn": URN("urn:cts:latinLit:phi1294.phi002.perseus-lat2")}, "http://foo.bar/treebank1", self.resolver, WTarget, "application/xml1", "treebank1" ]
def reference(self, value): """ Set up ID property :param value: Representation of the passage subreference as a list :type value: list, tuple, Reference .. note:: `Passage.id = [..]` will update automatically the URN property as well if correct """ _value = None if isinstance(value, (list, tuple)): _value = Reference(".".join(value)) elif isinstance(value, basestring): _value = Reference(value) elif isinstance(value, Reference): _value = value if _value and self.__reference != _value: self.__reference = _value if self._URN and len(self._URN): if len(value): self._URN = URN("{}:{}".format( self._URN.upTo(URN.NO_PASSAGE), str(_value))) else: self._URN = URN(self._URN["text"])
def __init__(self, urn="", parent=None, lang=None): self.__subtype__ = self.SUBTYPE super(CtsTextMetadata, self).__init__(identifier=str(urn)) self.resource = None self.citation = None self.__urn__ = URN(urn) self.docname = None self.validate = None if lang is not None: self.lang = lang if parent is not None: self.parent = parent if lang is None: self.lang = self.parent.lang
def test_missing_text_in_passage_emptiness(self): a = URN("urn:cts:greekLit:textgroup.work:1-2") self.assertEqual(str(a), "urn:cts:greekLit:textgroup.work:1-2") self.assertEqual(a.upTo(URN.COMPLETE), "urn:cts:greekLit:textgroup.work:1-2") self.assertEqual(a.upTo(URN.NAMESPACE), "urn:cts:greekLit") self.assertEqual(a.upTo(URN.TEXTGROUP), "urn:cts:greekLit:textgroup") self.assertEqual(a.upTo(URN.WORK), "urn:cts:greekLit:textgroup.work") self.assertEqual(a.upTo(URN.NO_PASSAGE), "urn:cts:greekLit:textgroup.work") self.assertEqual(a.upTo(URN.PASSAGE), "urn:cts:greekLit:textgroup.work:1-2") self.assertEqual(a.upTo(URN.PASSAGE_START), "urn:cts:greekLit:textgroup.work:1") self.assertEqual(a.upTo(URN.PASSAGE_END), "urn:cts:greekLit:textgroup.work:2") self.assertEqual(a.reference, Reference("1-2")) self.assertEqual(a.reference.start, Reference("1")) self.assertEqual(a.reference.end, Reference("2")) self.assertIsNone(a.version)
def test_urn_access(self): # Limited to what's possible in proto... a = Resource(resource="hello") a.urn = URN("urn:cts:greekLit:tg") b = TIV(resource="hello") with six.assertRaisesRegex(self, ValueError, "Not valid urn"): a["urn:cts:greekLit"] self.assertEqual(a["urn:cts:greekLit:tg"], a) with six.assertRaisesRegex(self, ValueError, "Unrecognized urn at URN Textgroup"): b["urn:cts:greekLit:tg2"]