def __init__(self, resource, name=None, logger=None, dispatcher=None): """ Initiate the XMLResolver """ if dispatcher is None: inventory_collection = CtsTextInventoryCollection(identifier="defaultTic") ti = XmlCtsTextInventoryMetadata("default") ti.parent = inventory_collection ti.set_label("Default collection", "eng") self.dispatcher = CollectionDispatcher(inventory_collection) else: self.dispatcher = dispatcher self.__inventory__ = self.dispatcher.collection self.name = name self.logger = logger if not logger: self.logger = logging.getLogger(name) if not name: self.name = "repository" self.TEXT_CLASS = type(self).TEXT_CLASS self.works = [] self.parse(resource)
def test_dispatching_error(self): tic = CtsTextInventoryCollection() latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic) latin.set_label("Classical Latin", "eng") dispatcher = CollectionDispatcher(tic) # We remove default dispatcher dispatcher.__methods__ = [] @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False NautilusCTSResolver.RAISE_ON_UNDISPATCHED = True with self.assertRaises(Exception): resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"], dispatcher=dispatcher) resolver.logger.disabled = True resolver.parse() NautilusCTSResolver.RAISE_ON_UNDISPATCHED = False try: resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"], dispatcher=dispatcher) resolver.logger.disabled = True resolver.REMOVE_EMPTY = False resolver.parse() except UndispatchedTextError as E: self.fail("UndispatchedTextError should not have been raised")
def __init__(self, resource, name=None, logger=None, cache=None, dispatcher=None): """ Initiate the XMLResolver """ if dispatcher is None: inventory_collection = TextInventoryCollection(identifier="defaultTic") ti = TextInventory("default") ti.parent = inventory_collection ti.set_label("Default collection", "eng") self.dispatcher = CollectionDispatcher(inventory_collection) else: self.dispatcher = dispatcher self.__inventory__ = None self.__texts__ = [] self.name = name self.logger = logger if not logger: self.logger = logging.getLogger(name) if not name: self.name = "repository" if cache is None: cache = NullCache() self.__cache__ = cache self.__resources__ = resource self.inventory_cache_key = _cache_key("Nautilus", self.name, "Inventory", "Resources") self.texts_parsed_cache_key = _cache_key("Nautilus", self.name, "Inventory", "TextsParsed")
def test_dispatching_latin_greek(self): tic = CtsTextInventoryCollection() latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic) latin.set_label("Classical Latin", "eng") farsi = CtsTextInventoryMetadata("urn:perseus:farsiLit", parent=tic) farsi.set_label("Farsi", "eng") gc = CtsTextInventoryMetadata("urn:perseus:greekLit", parent=tic) gc.set_label("Ancient Greek", "eng") gc.set_label("Grec Ancien", "fre") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False @dispatcher.inventory("urn:perseus:farsiLit") def dispatchfFarsiLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:farsiLit:"): return True return False @dispatcher.inventory("urn:perseus:greekLit") def dispatchGreekLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:greekLit:"): return True return False resolver = CtsCapitainsLocalResolver( ["./tests/testing_data/latinLit2"], dispatcher=dispatcher ) latin_stuff = resolver.getMetadata("urn:perseus:latinLit") greek_stuff = resolver.getMetadata("urn:perseus:greekLit") farsi_stuff = resolver.getMetadata("urn:perseus:farsiLit") self.assertEqual( len(latin_stuff.readableDescendants), 20, "There should be 20 readable descendants in Latin" ) self.assertIsInstance( latin_stuff, CtsTextInventoryMetadata, "should be textinventory" ) self.assertEqual( len(greek_stuff.readableDescendants), 6, "There should be 6 readable descendants in Greek [6 only in __cts__.xml]" ) self.assertEqual( len(farsi_stuff.descendants), 0, "There should be nothing in FarsiLit" ) self.assertEqual( str(greek_stuff.get_label("fre")), "Grec Ancien", "Label should be correct" ) with self.assertRaises(KeyError): _ = latin_stuff["urn:cts:greekLit:tlg0003"]
def make_dispatcher(): tic = CtsTextInventoryCollection() latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic) latin.set_label("Classical Latin", "eng") latin.set_label("Latin Classique", "fre") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False return dispatcher
def test_post_work_dispatching_active(self): """ Dispatching is working after editions, we dispatch based on citation scheme""" tic = CtsTextInventoryCollection() poetry = CtsTextInventoryMetadata("urn:perseus:poetry", parent=tic) prose = CtsTextInventoryMetadata("urn:perseus:prose", parent=tic) dispatcher = CollectionDispatcher(tic, default_inventory_name="urn:perseus:prose") @dispatcher.inventory("urn:perseus:poetry") def dispatchPoetry(collection, **kwargs): for readable in collection.readableDescendants: for citation in readable.citation: if citation.name == "line": return True return False resolver = CtsCapitainsLocalResolver( ["./tests/testing_data/latinLit2"], dispatcher=dispatcher ) all = resolver.getMetadata().export(Mimetypes.XML.CTS) poetry_stuff = resolver.getMetadata("urn:perseus:poetry").export(Mimetypes.XML.CTS) prose_stuff = resolver.getMetadata("urn:perseus:prose").export(Mimetypes.XML.CTS) get_graph().remove((None, None, None)) del poetry, prose poetry, prose = XmlCtsTextInventoryMetadata.parse(poetry_stuff), XmlCtsTextInventoryMetadata.parse(prose_stuff) self.assertEqual( len(poetry.textgroups), 3, "There should be 3 textgroups in Poetry (Martial, Ovid and Juvenal)" ) self.assertIsInstance(poetry, CtsTextInventoryMetadata, "should be textinventory") self.assertEqual( len(prose.textgroups), 1, "There should be one textgroup in Prose (Greek texts)" ) get_graph().remove((None, None, None)) del poetry, prose all = XmlCtsTextInventoryMetadata.parse(all) self.assertEqual( len(all.readableDescendants), 26, "There should be all 26 readable descendants in the master collection" )
self.nemo = Nemo(resolver=NemoResource.endpoint, app=Flask(__name__)) tic = CtsTextInventoryCollection() latin = XmlCtsTextInventoryMetadata("urn:perseus:latinLit") latin.parent = tic latin.set_label("Classical Latin", "eng") farsi = XmlCtsTextInventoryMetadata("urn:perseus:farsiLit") farsi.parent = tic farsi.set_label("Farsi", "eng") gc = XmlCtsTextInventoryMetadata("urn:perseus:greekLit") gc.parent = tic gc.set_label("Ancient Greek", "eng") gc.set_label("Grec Ancien", "fre") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False @dispatcher.inventory("urn:perseus:farsiLit") def dispatchfFarsiLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:farsiLit:"): return True return False
def build_resolver(configuration_file): """ :param configuration_file: :return: Organizer, Resolver and Cache handler """ with open(configuration_file) as f: xml = etree.parse(f) directories = [ # Compute path relative to the configuration files relative_folder(configuration_file, directory) for directory in xml.xpath("//corpora/corpus/text()") ] default_collection = None general_collection = CtsTextInventoryCollection() filters_to_register = [] for collection in xml.xpath("//collections/collection"): identifier = collection.xpath("./identifier/text()")[0] if collection.get("default") == "true": default_collection = identifier current_collection = CtsTextInventoryMetadata( identifier, parent=general_collection) for name in collection.xpath("./name"): current_collection.set_label(name.text, name.get("lang")) # We look at dispatching filters in the collection for filters in collection.xpath("./filters"): # We register prefix filters prefix_filters = [] for prefix in filters.xpath("./id-starts-with/text()"): prefix_filters.append( lambda collection: str(collection.id).startswith(prefix)) # We register citation filters citation_filters = [] for citation_name in filters.xpath("./citation-contains/text()"): citation_filters.append( lambda collection: citation_contain_filter( collection, citation_name)) # We register path based filters directory_filters = [] for target_directory in filters.xpath("./folder/text()"): directory_filters.append( lambda collection, path=None: path.startswith( relative_folder(configuration_file, target_directory))) filters_to_register += [ (identifier, collection_dispatcher_builder(collection, prefix_filters, citation_filters, directory_filters)) ] # Create the dispatcher organizer = CollectionDispatcher(general_collection, default_inventory_name=default_collection) for destination_collection, anonymous_dispatching_function in filters_to_register: organizer.add(anonymous_dispatching_function, destination_collection) # Set-up the cache folder # ToDO : Add a system for redis ? cache = None for cache_folder in xml.xpath("//cache-folder/text()"): cache = FileSystemCache(cache_folder) if cache is None: cache = SimpleCache() resolver = NautilusCTSResolver(resource=directories, dispatcher=organizer, cache=cache) return organizer, resolver, cache
str(chapter_number), # First the reference for the URI as string "Pratum Spirituale " + str(chapter_number) # Then the readable format for humans )) return chapters # Setting up the collections general_collection = CtsTextInventoryCollection() greek_texts = CtsTextInventoryMetadata("greek_texts", parent=general_collection) greek_texts.set_label("Greek Texts", "eng") organizer = CollectionDispatcher(general_collection, default_inventory_name="id:misc") @organizer.inventory("greek_texts") def organize_my_meadow(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:greekLit"): return True return False flask_app = Flask("Flask Application for Nemo") resolver = NautilusCTSResolver(["corpora/meadow"], dispatcher=organizer) resolver.parse() nautilus_api = FlaskNautilus(prefix="/api", app=flask_app, resolver=resolver)
def test_dispatching_output(self): tic = CtsTextInventoryCollection() latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic) latin.set_label("Classical Latin", "eng") farsi = CtsTextInventoryMetadata("urn:perseus:farsiLit", parent=tic) farsi.set_label("Farsi", "eng") gc = CtsTextInventoryMetadata("urn:perseus:greekLit", parent=tic) gc.set_label("Ancient Greek", "eng") gc.set_label("Grec Ancien", "fre") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False @dispatcher.inventory("urn:perseus:farsiLit") def dispatchfFarsiLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:farsiLit:"): return True return False @dispatcher.inventory("urn:perseus:greekLit") def dispatchGreekLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:greekLit:"): return True return False resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"], dispatcher=dispatcher) resolver.logger.disabled = True resolver.REMOVE_EMPTY = False resolver.parse() all = resolver.getMetadata().export(Mimetypes.XML.CTS) latin_stuff = resolver.getMetadata("urn:perseus:latinLit").export( Mimetypes.XML.CTS) greek_stuff = resolver.getMetadata("urn:perseus:greekLit").export( Mimetypes.XML.CTS) farsi_stuff = resolver.getMetadata("urn:perseus:farsiLit").export( Mimetypes.XML.CTS) get_graph().remove((None, None, None)) latin_stuff, greek_stuff, farsi_stuff = XmlCtsTextInventoryMetadata.parse(latin_stuff), \ XmlCtsTextInventoryMetadata.parse(greek_stuff), \ XmlCtsTextInventoryMetadata.parse(farsi_stuff) self.assertEqual(len(latin_stuff.readableDescendants), 19, "There should be 19 readable descendants in Latin") self.assertIsInstance(latin_stuff, CtsTextInventoryMetadata, "should be textinventory") self.assertEqual( len(greek_stuff.readableDescendants), 6, "There should be 6 readable descendants in Greek [6 only in __cts__.xml]" ) self.assertEqual(len(farsi_stuff.descendants), 0, "There should be nothing in FarsiLit") self.assertEqual( greek_stuff.get_label("fre"), None, # Text inventory have no label in CTS "Label should be correct") get_graph().remove((None, None, None)) all = XmlCtsTextInventoryMetadata.parse(all) self.assertEqual( len(all.readableDescendants), 25, "There should be all 25 readable descendants in the master collection" )
class CtsCapitainsLocalResolver(Resolver): """ XML Folder Based resolver. CtsTextMetadata and metadata resolver based on local directories :param resource: Resource should be a list of folders retaining data as Capitains Guidelines Repositories :type resource: [str] :param name: Key used to differentiate Repository and thus enabling different repo to be used :type name: str :param logger: Logging object :type logger: logging :cvar TEXT_CLASS: CtsTextMetadata Class [not instantiated] to be used to parse Texts. Can be changed to support Cache for example :type TEXT_CLASS: class :cvar DEFAULT_PAGE: Default Page to show :cvar PER_PAGE: Tuple representing the minimal number of texts returned, the default number and the maximum number of texts returned """ TEXT_CLASS = CapitainsCtsText DEFAULT_PAGE = 1 PER_PAGE = (1, 10, 100) # Min, Default, Mainvex, RAISE_ON_UNDISPATCHED = False @property def inventory(self): return self.__inventory__ @property def texts(self): return self.inventory.readableDescendants def __init__(self, resource, name=None, logger=None, dispatcher=None): """ Initiate the XMLResolver """ if dispatcher is None: inventory_collection = CtsTextInventoryCollection(identifier="defaultTic") ti = XmlCtsTextInventoryMetadata("default") ti.parent = inventory_collection ti.set_label("Default collection", "eng") self.dispatcher = CollectionDispatcher(inventory_collection) else: self.dispatcher = dispatcher self.__inventory__ = self.dispatcher.collection self.name = name self.logger = logger if not logger: self.logger = logging.getLogger(name) if not name: self.name = "repository" self.TEXT_CLASS = type(self).TEXT_CLASS self.works = [] self.parse(resource) def xmlparse(self, file): """ Parse a XML file :param file: Opened File :return: Tree """ return xmlparser(file) def parse(self, resource): """ Parse a list of directories and reades it into a collection :param resource: List of folders :return: An inventory resource and a list of CtsTextMetadata metadata-objects """ for folder in resource: textgroups = glob("{base_folder}/data/*/__cts__.xml".format(base_folder=folder)) for __cts__ in textgroups: try: with io.open(__cts__) as __xml__: textgroup = XmlCtsTextgroupMetadata.parse( resource=__xml__ ) tg_urn = str(textgroup.urn) if tg_urn in self.inventory: self.inventory[tg_urn].update(textgroup) else: self.dispatcher.dispatch(textgroup, path=__cts__) for __subcts__ in glob("{parent}/*/__cts__.xml".format(parent=os.path.dirname(__cts__))): with io.open(__subcts__) as __xml__: work = XmlCtsWorkMetadata.parse( resource=__xml__, parent=self.inventory[tg_urn] ) work_urn = str(work.urn) if work_urn in self.inventory[tg_urn].works: self.inventory[work_urn].update(work) for __textkey__ in work.texts: __text__ = self.inventory[__textkey__] __text__.path = "{directory}/{textgroup}.{work}.{version}.xml".format( directory=os.path.dirname(__subcts__), textgroup=__text__.urn.textgroup, work=__text__.urn.work, version=__text__.urn.version ) if os.path.isfile(__text__.path): try: with io.open(__text__.path) as f: t = CapitainsCtsText(resource=self.xmlparse(f)) cites = list() for cite in [c for c in t.citation][::-1]: if len(cites) >= 1: cites.append(XmlCtsCitation( xpath=cite.xpath.replace("'", '"'), scope=cite.scope.replace("'", '"'), name=cite.name, child=cites[-1] )) else: cites.append(XmlCtsCitation( xpath=cite.xpath.replace("'", '"'), scope=cite.scope.replace("'", '"'), name=cite.name )) del t __text__.citation = cites[-1] self.logger.info("%s has been parsed ", __text__.path) if __text__.citation.isEmpty() is False: self.texts.append(__text__) else: self.logger.error("%s has no passages", __text__.path) except Exception: self.logger.error( "%s does not accept parsing at some level (most probably citation) ", __text__.path ) else: self.logger.error("%s is not present", __text__.path) except UndispatchedTextError as E: self.logger.error("Error dispatching %s ", __cts__) if self.RAISE_ON_UNDISPATCHED is True: raise E except Exception as E: self.logger.error("Error parsing %s ", __cts__) return self.inventory, self.texts def __getText__(self, urn): """ Returns a CtsTextMetadata object :param urn: URN of a text to retrieve :type urn: str, URN :return: Textual resource and metadata :rtype: (CapitainsCtsText, InventoryText) """ if not isinstance(urn, URN): urn = URN(urn) if len(urn) != 5: if len(urn) == 4: urn, reference = urn.upTo(URN.WORK), str(urn.reference) urn = [ t.id for t in self.texts if t.id.startswith(str(urn)) and isinstance(t, XmlCtsEditionMetadata) ] if len(urn) > 0: urn = URN(urn[0]) else: raise UnknownObjectError else: raise InvalidURN text = self.inventory[str(urn)] if os.path.isfile(text.path): with io.open(text.path) as __xml__: resource = self.TEXT_CLASS(urn=urn, resource=self.xmlparse(__xml__)) else: resource = None self.logger.warning('The file {} is mentioned in the metadata but does not exist'.format(text.path)) return resource, text def __getTextMetadata__(self, urn=None, page=None, limit=None, lang=None, category=None, pagination=False ): """ Retrieve a slice of the inventory filtered by given arguments :param urn: Partial URN to use to filter out resources :type urn: str :param page: Page to show :type page: int :param limit: Item Per Page :type limit: int :param inventory: Inventory name :type inventory: str :param lang: Language to filter on :type lang: str :param category: Type of elements to show :type category: str :param pagination: Activate pagination :type pagination: bool :return: ([Matches], Page, Count) :rtype: ([CtsTextMetadata], int, int) """ __PART = None if urn is not None: if isinstance(urn, URN): _urn = urn else: _urn = URN(urn) __PART = [None, None, URN.NAMESPACE, URN.TEXTGROUP, URN.WORK, URN.VERSION, URN.COMPLETE][len(_urn)] matches = [ text for text in self.texts if (lang is None or (lang is not None and lang == text.lang)) and (urn is None or (urn is not None and text.urn.upTo(__PART) == urn)) and (text.citation is not None) and ( category not in ["edition", "translation", "commentary"] or (category in ["edition", "translation", "commentary"] and category.lower() == text.subtype.lower()) ) ] if pagination: start_index, end_index, page, count = type(self).pagination(page, limit, len(matches)) else: start_index, end_index, page, count = None, None, 0, len(matches) return matches[start_index:end_index], page, count @staticmethod def pagination(page, limit, length): """ Help for pagination :param page: Provided Page :param limit: Number of item to show :param length: Length of the list to paginate :return: (Start Index, End Index, Page Number, Item Count) """ realpage = page page = page or CtsCapitainsLocalResolver.DEFAULT_PAGE limit = limit or CtsCapitainsLocalResolver.PER_PAGE[1] if limit < CtsCapitainsLocalResolver.PER_PAGE[0] or limit > CtsCapitainsLocalResolver.PER_PAGE[2]: limit = CtsCapitainsLocalResolver.PER_PAGE[1] page = (page - 1) * limit if page > length: realpage = int(ceil(length / limit)) page = limit * (realpage - 1) count = length - 1 elif limit - 1 + page < length: count = limit - 1 + page else: count = length - 1 return page, count + 1, realpage, count - page + 1 def getMetadata(self, objectId=None, **filters): """ Request metadata about a text or a collection :param objectId: Object Identifier to filter on :type objectId: str :param filters: Kwargs parameters. :type filters: dict :return: Collection """ if objectId is None: return self.inventory elif objectId in self.inventory.children.keys(): return self.inventory[objectId] texts, _, _ = self.__getTextMetadata__(urn=objectId) # We store inventory names and if there is only one we recreate the inventory inv_names = [text.parent.parent.parent.id for text in texts] if len(set(inv_names)) == 1: inventory = XmlCtsTextInventoryMetadata(name=inv_names[0]) else: inventory = XmlCtsTextInventoryMetadata() # For each text we found using the filter for text in texts: tg_urn = str(text.parent.parent.urn) wk_urn = str(text.parent.urn) txt_urn = str(text.urn) # If we need to generate a textgroup object if tg_urn not in inventory.textgroups: XmlCtsTextgroupMetadata(urn=tg_urn, parent=inventory) # If we need to generate a work object if wk_urn not in inventory.textgroups[tg_urn].works: XmlCtsWorkMetadata(urn=wk_urn, parent=inventory.textgroups[tg_urn]) if isinstance(text, XmlCtsEditionMetadata): x = XmlCtsEditionMetadata(urn=txt_urn, parent=inventory.textgroups[tg_urn].works[wk_urn]) x.citation = text.citation elif isinstance(text, XmlCtsTranslationMetadata): x = XmlCtsTranslationMetadata(urn=txt_urn, parent=inventory.textgroups[tg_urn].works[wk_urn], lang=text.lang) x.citation = text.citation elif isinstance(text, XmlCtsCommentaryMetadata): x = XmlCtsCommentaryMetadata(urn=txt_urn, parent=inventory.textgroups[tg_urn].works[wk_urn], lang=text.lang) x.citation = text.citation return inventory[objectId] def getTextualNode(self, textId, subreference=None, prevnext=False, metadata=False): """ Retrieve a text node from the API :param textId: CtsTextMetadata Identifier :type textId: str :param subreference: CapitainsCtsPassage Reference :type subreference: str :param prevnext: Retrieve graph representing previous and next passage :type prevnext: boolean :param metadata: Retrieve metadata about the passage and the text :type metadata: boolean :return: CapitainsCtsPassage :rtype: CapitainsCtsPassage """ text, text_metadata = self.__getText__(textId) if subreference is not None: subreference = Reference(subreference) passage = text.getTextualNode(subreference) if metadata: passage.set_metadata_from_collection(text_metadata) return passage def getSiblings(self, textId, subreference): """ Retrieve the siblings of a textual node :param textId: CtsTextMetadata Identifier :type textId: str :param subreference: CapitainsCtsPassage Reference :type subreference: str :return: Tuple of references :rtype: (str, str) """ text, inventory = self.__getText__(textId) passage = text.getTextualNode(Reference(subreference)) return passage.siblingsId def getReffs(self, textId, level=1, subreference=None): """ Retrieve the siblings of a textual node :param textId: CtsTextMetadata Identifier :type textId: str :param level: Depth for retrieval :type level: int :param subreference: CapitainsCtsPassage Reference :type subreference: str :return: List of references :rtype: [str] """ passage, inventory = self.__getText__(textId) if subreference: passage = passage.getTextualNode(subreference) return passage.getReffs(level=level, subreference=subreference)
formulae = CtsTextInventoryMetadata('formulae_collection', parent=general_collection) formulae.set_label('Formulae', 'ger') formulae.set_label('Formulae', 'eng') formulae.set_label('Formulae', 'fre') chartae = CtsTextInventoryMetadata('other_collection', parent=general_collection) chartae.set_label('Andere Texte', 'ger') chartae.set_label('Other Texts', 'eng') chartae.set_label('Autres Textes', 'fre') elexicon = CtsTextInventoryMetadata('lexicon_entries', parent=general_collection) elexicon.set_label('Lexikon', 'ger') elexicon.set_label('Lexicon', 'eng') elexicon.set_label('Lexique', 'fre') organizer = CollectionDispatcher(general_collection, default_inventory_name='other_collection') @organizer.inventory("formulae_collection") def organize_formulae(collection, path=None, **kwargs): if collection.id.startswith('urn:cts:formulae:andecavensis'): return True return False @organizer.inventory("lexicon_entries") def organize_elexicon(collection, path=None, **kwargs): if collection.id.startswith('urn:cts:formulae:elexicon'): return True return False
class NautilusCTSResolver(CtsCapitainsLocalResolver): """ XML Folder Based resolver. :param resource: Resource should be a list of folders retaining data as Capitains Guidelines Repositories :type resource: [str] :param name: Key used to make cache key :param cache: Cache object to be used for the inventory :type cache: BaseCache :param logger: Logging object :type logger: logging.logger :ivar inventory_cache_key: Werkzeug Cache key to get or set cache for the TextInventory :ivar texts_cache_key: Werkzeug Cache key to get or set cache for lists of metadata texts objects :ivar texts_parsed: Werkzeug Cache key to get or set cache for lists of parsed texts objects :ivar texts: List of Text Metadata objects :ivar source: Original resource parameter .. warning :: This resolver does not support inventories """ TIMEOUT = 0 NautilusCTSResolver = False REMOVE_EMPTY = True CACHE_FULL_TEI = False def __init__(self, resource, name=None, logger=None, cache=None, dispatcher=None): """ Initiate the XMLResolver """ if dispatcher is None: inventory_collection = TextInventoryCollection(identifier="defaultTic") ti = TextInventory("default") ti.parent = inventory_collection ti.set_label("Default collection", "eng") self.dispatcher = CollectionDispatcher(inventory_collection) else: self.dispatcher = dispatcher self.__inventory__ = None self.__texts__ = [] self.name = name self.logger = logger if not logger: self.logger = logging.getLogger(name) if not name: self.name = "repository" if cache is None: cache = NullCache() self.__cache__ = cache self.__resources__ = resource self.inventory_cache_key = _cache_key("Nautilus", self.name, "Inventory", "Resources") self.texts_parsed_cache_key = _cache_key("Nautilus", self.name, "Inventory", "TextsParsed") @property def cache(self): return self.__cache__ @property def inventory(self): if self.__inventory__ is None or len(self.__inventory__.readableDescendants) == 0: self.__inventory__ = self.get_or(self.inventory_cache_key, self.parse, self.__resources__) set_graph(self.__inventory__.graph) return self.__inventory__ @inventory.setter def inventory(self, value): self.__inventory__ = value self.cache.set(self.inventory_cache_key, value, self.TIMEOUT) @property def texts(self): """ List of text known :rtype: list """ return self.inventory.readableDescendants def xmlparse(self, file): """ Parse a XML file :param file: Opened File :return: Tree """ if self.CACHE_FULL_TEI is True: return self.get_or( _cache_key("Nautilus", self.name, "File", "Tree", file.name), super(NautilusCTSResolver, self).xmlparse, file ) return super(NautilusCTSResolver, self).xmlparse(file) def get_or(self, cache_key, callback, *args, **kwargs): """ Get or set the cache using callback and arguments :param cache_key: Cache key for given resource :param callback: Callback if object does not exist :param args: Ordered Argument for the callback :param kwargs: Keyword argument for the callback :return: Output of the callback """ cached = self.cache.get(cache_key) if cached is not None: return cached else: try: output = callback(*args, **kwargs) except MyCapytain.errors.UnknownCollection as E: raise UnknownCollection(str(E)) except Exception as E: raise E self.cache.set(cache_key, output, self.TIMEOUT) return output def read(self, identifier, path=None): """ Read a text object given an identifier and a path :param identifier: Identifier of the text :param path: Path of the text files :return: Text """ if self.CACHE_FULL_TEI is True: o = self.cache.get(_cache_key(self.texts_parsed_cache_key, identifier)) if o is not None: return o else: with open(path) as f: o = Text(urn=identifier, resource=self.xmlparse(f)) self.cache.set(_cache_key(self.texts_parsed_cache_key, identifier), o) else: with open(path) as f: o = Text(urn=identifier, resource=self.xmlparse(f)) return o def parse(self, resource=None): """ Parse a list of directories ans :param resource: List of folders :param ret: Return a specific item ("inventory" or "texts") """ if resource is None: resource = self.__resources__ removing = [] for folder in resource: textgroups = glob("{base_folder}/data/*/__cts__.xml".format(base_folder=folder)) for __cts__ in textgroups: try: with open(__cts__) as __xml__: textgroup = TextGroup.parse( resource=__xml__ ) tg_urn = str(textgroup.urn) if tg_urn in self.dispatcher.collection: self.dispatcher.collection[tg_urn].update(textgroup) else: self.dispatcher.dispatch(textgroup, path=__cts__) for __subcts__ in glob("{parent}/*/__cts__.xml".format(parent=os.path.dirname(__cts__))): with open(__subcts__) as __xml__: work = Work.parse( resource=__xml__, parent=self.dispatcher.collection[tg_urn] ) work_urn = str(work.urn) if work_urn in self.dispatcher.collection[tg_urn].works: self.dispatcher.collection[work_urn].update(work) for __textkey__ in work.texts: __text__ = self.dispatcher.collection[__textkey__] __text__.path = "{directory}/{textgroup}.{work}.{version}.xml".format( directory=os.path.dirname(__subcts__), textgroup=__text__.urn.textgroup, work=__text__.urn.work, version=__text__.urn.version ) if os.path.isfile(__text__.path): try: t = self.read(__textkey__, __text__.path) cites = list() for cite in [c for c in t.citation][::-1]: if len(cites) >= 1: cites.append(Citation( xpath=cite.xpath.replace("'", '"'), scope=cite.scope.replace("'", '"'), name=cite.name, child=cites[-1] )) else: cites.append(Citation( xpath=cite.xpath.replace("'", '"'), scope=cite.scope.replace("'", '"'), name=cite.name )) del t __text__.citation = cites[-1] self.logger.info("%s has been parsed ", __text__.path) if __text__.citation.isEmpty() is True: removing.append(__textkey__) self.logger.error("%s has no passages", __text__.path) except Exception as E: removing.append(__textkey__) self.logger.error( "%s does not accept parsing at some level (most probably citation) ", __text__.path ) else: removing.append(__textkey__) self.logger.error("%s is not present", __text__.path) except MyCapytain.errors.UndispatchedTextError as E: self.logger.error("Error dispatching %s ", __cts__) if self.RAISE_ON_UNDISPATCHED is True: raise UndispatchedTextError(E) except Exception as E: self.logger.error("Error parsing %s ", __cts__) for removable in removing: del self.dispatcher.collection[removable] removing = [] if self.REMOVE_EMPTY is True: # Find resource with no readable descendants for item in self.dispatcher.collection.descendants: if item.readable != True and len(item.readableDescendants) == 0: removing.append(item.id) # Remove them only if they have not been removed before for removable in removing: if removable in self.dispatcher.collection: del self.dispatcher.collection[removable] self.inventory = self.dispatcher.collection return self.inventory def __getText__(self, urn): """ Returns a PrototypeText object :param urn: URN of a text to retrieve :type urn: str, URN :return: Textual resource and metadata :rtype: (Text, InventoryText) """ if not isinstance(urn, URN): urn = URN(urn) if len(urn) != 5: if len(urn) == 4: urn, reference = urn.upTo(URN.WORK), str(urn.reference) urn = [ t.id for t in self.texts if t.id.startswith(str(urn)) and isinstance(t, Edition) ] if len(urn) > 0: urn = URN(urn[0]) else: raise UnknownCollection else: raise InvalidURN try: text = self.inventory[str(urn)] except MyCapytain.errors.UnknownCollection as E: raise UnknownCollection(str(E)) except Exception as E: raise E if os.path.isfile(text.path): resource = self.read(identifier=urn, path=text.path) else: resource = None raise UnknownCollection("File matching %s does not exist" % text.path) return resource, text def getMetadata(self, objectId=None, **filters): """ Request metadata about a text or a collection :param objectId: Object Identifier to filter on :type objectId: str :param filters: Kwargs parameters. :type filters: dict :return: Collection """ return self.get_or( _cache_key("Nautilus", self.name, "GetMetadata", objectId), super(NautilusCTSResolver, self).getMetadata, objectId ) def getReffs(self, textId, level=1, subreference=None): """ Retrieve the siblings of a textual node :param textId: PrototypeText Identifier :type textId: str :param level: Depth for retrieval :type level: int :param subreference: Passage Reference :type subreference: str :return: List of references :rtype: [str] """ return self.get_or( self.__cache_key_reffs__(textId, level, subreference), super(NautilusCTSResolver, self).getReffs, textId, level, subreference ) def __cache_key_reffs__(self, textId, level, subreference): return _cache_key("Nautilus", self.name, "getReffs", textId, level, subreference) def getTextualNode(self, textId, subreference=None, prevnext=False, metadata=False): """ Retrieve a text node from the API :param textId: PrototypeText Identifier :type textId: str :param subreference: Passage Reference :type subreference: str :param prevnext: Retrieve graph representing previous and next passage :type prevnext: boolean :param metadata: Retrieve metadata about the passage and the text :type metadata: boolean :return: Passage :rtype: Passage """ key = _cache_key("Nautilus", self.name, "Passage", textId, subreference) o = self.cache.get(key) if o is not None: return o text, text_metadata = self.__getText__(textId) if subreference is not None: subreference = Reference(subreference) passage = text.getTextualNode(subreference) passage.set_metadata_from_collection(text_metadata) self.cache.set(key, passage) return passage def getSiblings(self, textId, subreference): """ Retrieve the siblings of a textual node :param textId: PrototypeText Identifier :type textId: str :param subreference: Passage Reference :type subreference: str :return: Tuple of references :rtype: (str, str) """ key = _cache_key("Nautilus", self.name, "Siblings", textId, subreference) o = self.cache.get(key) if o is not None: return o passage = self.getTextualNode(textId, subreference, prevnext=True) siblings = passage.siblingsId self.cache.set(key, siblings) return siblings