def __init__(self, resource, name=None, logger=None, cache=None, dispatcher=None): """ Initiate the XMLResolver """ if dispatcher is None: inventory_collection = TextInventoryCollection(identifier="defaultTic") ti = TextInventory("default") ti.parent = inventory_collection ti.set_label("Default collection", "eng") self.dispatcher = CollectionDispatcher(inventory_collection) else: self.dispatcher = dispatcher self.__inventory__ = None self.__texts__ = [] self.name = name self.logger = logger if not logger: self.logger = logging.getLogger(name) if not name: self.name = "repository" if cache is None: cache = NullCache() self.__cache__ = cache self.__resources__ = resource self.inventory_cache_key = _cache_key("Nautilus", self.name, "Inventory", "Resources") self.texts_parsed_cache_key = _cache_key("Nautilus", self.name, "Inventory", "TextsParsed")
def __init__(self, resource, name=None, logger=None, dispatcher=None): """ Initiate the XMLResolver """ if dispatcher is None: inventory_collection = CtsTextInventoryCollection(identifier="defaultTic") ti = XmlCtsTextInventoryMetadata("default") ti.parent = inventory_collection ti.set_label("Default collection", "eng") self.dispatcher = CollectionDispatcher(inventory_collection) else: self.dispatcher = dispatcher self.__inventory__ = self.dispatcher.collection self.name = name self.logger = logger if not logger: self.logger = logging.getLogger(name) if not name: self.name = "repository" self.TEXT_CLASS = type(self).TEXT_CLASS self.works = [] self.parse(resource)
def getMetadata(self, objectId=None, **filters): """ Request metadata about a text or a collection :param objectId: Object Identifier to filter on :type objectId: str :param filters: Kwargs parameters. :type filters: dict :return: Collection """ if objectId is None: return self.inventory elif objectId in self.inventory.children.keys(): return self.inventory[objectId] texts, _, _ = self.__getTextMetadata__(urn=objectId) # We store inventory names and if there is only one we recreate the inventory inv_names = [text.parent.parent.parent.id for text in texts] if len(set(inv_names)) == 1: inventory = XmlCtsTextInventoryMetadata(name=inv_names[0]) else: inventory = XmlCtsTextInventoryMetadata() # For each text we found using the filter for text in texts: tg_urn = str(text.parent.parent.urn) wk_urn = str(text.parent.urn) txt_urn = str(text.urn) # If we need to generate a textgroup object if tg_urn not in inventory.textgroups: XmlCtsTextgroupMetadata(urn=tg_urn, parent=inventory) # If we need to generate a work object if wk_urn not in inventory.textgroups[tg_urn].works: XmlCtsWorkMetadata(urn=wk_urn, parent=inventory.textgroups[tg_urn]) if isinstance(text, XmlCtsEditionMetadata): x = XmlCtsEditionMetadata(urn=txt_urn, parent=inventory.textgroups[tg_urn].works[wk_urn]) x.citation = text.citation elif isinstance(text, XmlCtsTranslationMetadata): x = XmlCtsTranslationMetadata(urn=txt_urn, parent=inventory.textgroups[tg_urn].works[wk_urn], lang=text.lang) x.citation = text.citation elif isinstance(text, XmlCtsCommentaryMetadata): x = XmlCtsCommentaryMetadata(urn=txt_urn, parent=inventory.textgroups[tg_urn].works[wk_urn], lang=text.lang) x.citation = text.citation return inventory[objectId]
def test_post_work_dispatching_active(self): """ Dispatching is working after editions, we dispatch based on citation scheme""" tic = CtsTextInventoryCollection() poetry = CtsTextInventoryMetadata("urn:perseus:poetry", parent=tic) prose = CtsTextInventoryMetadata("urn:perseus:prose", parent=tic) dispatcher = CollectionDispatcher(tic, default_inventory_name="urn:perseus:prose") @dispatcher.inventory("urn:perseus:poetry") def dispatchPoetry(collection, **kwargs): for readable in collection.readableDescendants: for citation in readable.citation: if citation.name == "line": return True return False resolver = CtsCapitainsLocalResolver( ["./tests/testing_data/latinLit2"], dispatcher=dispatcher ) all = resolver.getMetadata().export(Mimetypes.XML.CTS) poetry_stuff = resolver.getMetadata("urn:perseus:poetry").export(Mimetypes.XML.CTS) prose_stuff = resolver.getMetadata("urn:perseus:prose").export(Mimetypes.XML.CTS) get_graph().remove((None, None, None)) del poetry, prose poetry, prose = XmlCtsTextInventoryMetadata.parse(poetry_stuff), XmlCtsTextInventoryMetadata.parse(prose_stuff) self.assertEqual( len(poetry.textgroups), 3, "There should be 3 textgroups in Poetry (Martial, Ovid and Juvenal)" ) self.assertIsInstance(poetry, CtsTextInventoryMetadata, "should be textinventory") self.assertEqual( len(prose.textgroups), 1, "There should be one textgroup in Prose (Greek texts)" ) get_graph().remove((None, None, None)) del poetry, prose all = XmlCtsTextInventoryMetadata.parse(all) self.assertEqual( len(all.readableDescendants), 26, "There should be all 26 readable descendants in the master collection" )
def load_text_inventory_metadata() -> cts.CtsTextInventoryMetadata: resolver_type = settings.CTS_RESOLVER["type"] resolver = default_resolver() if resolver_type == "api": if getattr(settings, "CTS_LOCAL_TEXT_INVENTORY", None) is not None: with open(settings.CTS_LOCAL_TEXT_INVENTORY, "r") as fp: ti_xml = fp.read() else: ti_xml = resolver.endpoint.getCapabilities() return XmlCtsTextInventoryMetadata.parse(ti_xml) elif resolver_type == "local": return resolver.getMetadata()["default"]
def test_types(self): TI = XmlCtsTextInventoryMetadata.parse(resource=SENECA) self.assertCountEqual( [type(descendant) for descendant in TI.descendants], [XmlCtsTextgroupMetadata] + [XmlCtsWorkMetadata] * 10 + [XmlCtsEditionMetadata] * 10, "Descendant should be correctly parsed into correct types") self.assertCountEqual( [type(descendant) for descendant in TI.readableDescendants], [XmlCtsWorkMetadata] * 0 + [XmlCtsEditionMetadata] * 10, "Descendant should be correctly parsed into correct types and filtered when readable" )
def test_get_capabilities(self): """ Check the GetCapabilities request """ response = self.app.get("/cts?request=GetCapabilities") a = TextInventory.parse(resource=response.data.decode()) self.assertEqual( str(a["urn:cts:latinLit:phi1294.phi002.perseus-lat2"].urn), "urn:cts:latinLit:phi1294.phi002.perseus-lat2", ) # Test for cache : only works in Cache situation, with specific SIMPLE BACKEND if self.cache is not None: self.assertGreater( len(self.cache.cache._cache), 0, "There should be something cached" )
def getMetadata(self, objectId=None, **filters): """ Request metadata about a text or a collection :param objectId: Object Identifier to filter on :type objectId: str :param filters: Kwargs parameters. :type filters: dict :return: Collection """ if objectId is not None: filters["urn"] = objectId ti = XmlCtsTextInventoryMetadata.parse( self.endpoint.getCapabilities(**filters)) if objectId: return [x for x in [ti] + ti.descendants if x.id == objectId][0] return ti
def test_title(self): TI = XmlCtsTextInventoryMetadata.parse(resource=SENECA) self.assertCountEqual([ str(descendant.get_label()) for descendant in TI.descendants ], [ "Seneca, Lucius Annaeus", "de Ira", "de Vita Beata", "de consolatione ad Helviam", "de Constantia", "de Tranquilitate Animi", "de Brevitate Vitae", "de consolatione ad Polybium", "de consolatione ad Marciam", "de Providentia", "de Otio Sapientis", "de Ira, Moral essays Vol 2", "de Vita Beata, Moral essays Vol 2", "de consolatione ad Helviam, Moral essays Vol 2", "de Constantia, Moral essays Vol 2", "de Tranquilitate Animi, Moral essays Vol 2", "de Brevitate Vitae, Moral essays Vol 2", "de consolatione ad Polybium, Moral essays Vol 2", "de consolatione ad Marciam, Moral essays Vol 2", "de Providentia, Moral essays Vol 2", "de Otio Sapientis, Moral essays Vol 2" ], "Title should be computed correctly : default should be set")
self.getPassage_Capabilities = RequestPatchChained( [self.getCapabilities, self.getPassage]) with open("tests/test_data/getpassageplus.xml", "r") as f: self.getPassagePlus = RequestPatch(f) with open("tests/test_data/getprevnext.xml", "r") as f: self.getPrevNext = RequestPatch(f) self.getPassage_Route = RequestPatchChained( [self.getCapabilities, self.getPassage, self.getPrevNext]) self.nemo = Nemo(resolver=NemoResource.endpoint, app=Flask(__name__)) tic = CtsTextInventoryCollection() latin = XmlCtsTextInventoryMetadata("urn:perseus:latinLit") latin.parent = tic latin.set_label("Classical Latin", "eng") farsi = XmlCtsTextInventoryMetadata("urn:perseus:farsiLit") farsi.parent = tic farsi.set_label("Farsi", "eng") gc = XmlCtsTextInventoryMetadata("urn:perseus:greekLit") gc.parent = tic gc.set_label("Ancient Greek", "eng") gc.set_label("Grec Ancien", "fre") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs):
def test_dispatching_output(self): tic = CtsTextInventoryCollection() latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic) latin.set_label("Classical Latin", "eng") farsi = CtsTextInventoryMetadata("urn:perseus:farsiLit", parent=tic) farsi.set_label("Farsi", "eng") gc = CtsTextInventoryMetadata("urn:perseus:greekLit", parent=tic) gc.set_label("Ancient Greek", "eng") gc.set_label("Grec Ancien", "fre") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False @dispatcher.inventory("urn:perseus:farsiLit") def dispatchfFarsiLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:farsiLit:"): return True return False @dispatcher.inventory("urn:perseus:greekLit") def dispatchGreekLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:greekLit:"): return True return False resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"], dispatcher=dispatcher) resolver.logger.disabled = True resolver.REMOVE_EMPTY = False resolver.parse() all = resolver.getMetadata().export(Mimetypes.XML.CTS) latin_stuff = resolver.getMetadata("urn:perseus:latinLit").export( Mimetypes.XML.CTS) greek_stuff = resolver.getMetadata("urn:perseus:greekLit").export( Mimetypes.XML.CTS) farsi_stuff = resolver.getMetadata("urn:perseus:farsiLit").export( Mimetypes.XML.CTS) get_graph().remove((None, None, None)) latin_stuff, greek_stuff, farsi_stuff = XmlCtsTextInventoryMetadata.parse(latin_stuff), \ XmlCtsTextInventoryMetadata.parse(greek_stuff), \ XmlCtsTextInventoryMetadata.parse(farsi_stuff) self.assertEqual(len(latin_stuff.readableDescendants), 19, "There should be 19 readable descendants in Latin") self.assertIsInstance(latin_stuff, CtsTextInventoryMetadata, "should be textinventory") self.assertEqual( len(greek_stuff.readableDescendants), 6, "There should be 6 readable descendants in Greek [6 only in __cts__.xml]" ) self.assertEqual(len(farsi_stuff.descendants), 0, "There should be nothing in FarsiLit") self.assertEqual( greek_stuff.get_label("fre"), None, # Text inventory have no label in CTS "Label should be correct") get_graph().remove((None, None, None)) all = XmlCtsTextInventoryMetadata.parse(all) self.assertEqual( len(all.readableDescendants), 25, "There should be all 25 readable descendants in the master collection" )
def test_dispatching_latin_greek(self): tic = CtsTextInventoryCollection() latin = XmlCtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic) latin.set_label("Classical Latin", "eng") farsi = XmlCtsTextInventoryMetadata("urn:perseus:farsiLit", parent=tic) farsi.set_label("Farsi", "eng") gc = XmlCtsTextInventoryMetadata("urn:perseus:greekLit", parent=tic) gc.set_label("Ancient Greek", "eng") gc.set_label("Grec Ancien", "fre") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False @dispatcher.inventory("urn:perseus:farsiLit") def dispatchfFarsiLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:farsiLit:"): return True return False @dispatcher.inventory("urn:perseus:greekLit") def dispatchGreekLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:greekLit:"): return True return False resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"], dispatcher=dispatcher) resolver.logger.disabled = True resolver.REMOVE_EMPTY = False resolver.parse() latin_stuff = resolver.getMetadata("urn:perseus:latinLit") greek_stuff = resolver.getMetadata("urn:perseus:greekLit") farsi_stuff = resolver.getMetadata("urn:perseus:farsiLit") self.assertEqual(len(latin_stuff.readableDescendants), 19, "There should be 19 readable descendants in Latin") self.assertIsInstance(latin_stuff, CtsTextInventoryMetadata, "should be textinventory") self.assertEqual( len(greek_stuff.readableDescendants), 6, "There should be 6 readable descendants in Greek [6 only in __cts__.xml]" ) self.assertEqual(len(farsi_stuff.descendants), 0, "There should be nothing in FarsiLit") self.assertEqual(str(greek_stuff.get_label("fre")), "Grec Ancien", "Label should be correct") with self.assertRaises(KeyError): _ = latin_stuff["urn:cts:greekLit:tlg0003"]
def setUp(self): super(TestChunkers, self).setUp() self.inventory = XmlCtsTextInventoryMetadata.parse( resource=self.getCapabilities.text)
we are gonna query for data in the Leipzig CTS API We are gonna query for metadata about Seneca who is represented by urn:cts:latinLit:stoa0255 To retrieve data, we are gonna make a GetMetadata query to the CTS Retriever. """ retriever = HttpCtsRetriever("http://cts.dh.uni-leipzig.de/api/cts") # We store the response (Pure XML String) response = retriever.getMetadata(objectId="urn:cts:latinLit:stoa0255") """ From here, we actually have the necessary data, we can now play with collections. TextInventory is the main collection type that is needed to parse the whole response. """ inventory = XmlCtsTextInventoryMetadata.parse(resource=response) # What we are gonna do is print the title of each descendant : for descendant in inventory.descendants: # Metadatum resolve any non-existing language ("eng", "lat") to a default one # Putting default is just making that clear print(descendant.get_label()) """ You should see in there things such as - "Seneca, Lucius Annaeus" (The TextGroup or main object) - "de Ira" (The Work object) - "de Ira, Moral essays Vol 2" (The Edition specific Title) We can now see other functions, such as the export to JSON DTS. Collections have a unique feature built in : they allow for accessing an item using its key as if it were a dictionary : The identifier of a De Ira is urn:cts:latinLit:stoa0255.stoa0110
def test_new_object(self): """ When creating an object with same urn, we should retrieve the same metadata""" TI = XmlCtsTextInventoryMetadata.parse(resource=SENECA) a = TI["urn:cts:latinLit:stoa0255.stoa012.perseus-lat2"].metadata b = (CtsTextgroupMetadata("urn:cts:latinLit:stoa0255")).metadata