Ejemplo n.º 1
0
    def __init__(self, resource, name=None, logger=None, dispatcher=None):
        """ Initiate the XMLResolver
        """
        if dispatcher is None:
            inventory_collection = CtsTextInventoryCollection(identifier="defaultTic")
            ti = XmlCtsTextInventoryMetadata("default")
            ti.parent = inventory_collection
            ti.set_label("Default collection", "eng")
            self.dispatcher = CollectionDispatcher(inventory_collection)
        else:
            self.dispatcher = dispatcher
        self.__inventory__ = self.dispatcher.collection
        self.name = name

        self.logger = logger
        if not logger:
            self.logger = logging.getLogger(name)

        if not name:
            self.name = "repository"

        self.TEXT_CLASS = type(self).TEXT_CLASS
        self.works = []

        self.parse(resource)
Ejemplo n.º 2
0
    def test_dispatching_error(self):
        tic = CtsTextInventoryCollection()
        latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic)
        latin.set_label("Classical Latin", "eng")
        dispatcher = CollectionDispatcher(tic)
        # We remove default dispatcher
        dispatcher.__methods__ = []

        @dispatcher.inventory("urn:perseus:latinLit")
        def dispatchLatinLit(collection, path=None, **kwargs):
            if collection.id.startswith("urn:cts:latinLit:"):
                return True
            return False

        NautilusCTSResolver.RAISE_ON_UNDISPATCHED = True
        with self.assertRaises(Exception):
            resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"],
                                           dispatcher=dispatcher)
            resolver.logger.disabled = True
            resolver.parse()

        NautilusCTSResolver.RAISE_ON_UNDISPATCHED = False
        try:
            resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"],
                                           dispatcher=dispatcher)
            resolver.logger.disabled = True
            resolver.REMOVE_EMPTY = False
            resolver.parse()
        except UndispatchedTextError as E:
            self.fail("UndispatchedTextError should not have been raised")
Ejemplo n.º 3
0
    def __init__(self, resource, name=None, logger=None, cache=None, dispatcher=None):
        """ Initiate the XMLResolver

        """
        if dispatcher is None:
            inventory_collection = TextInventoryCollection(identifier="defaultTic")
            ti = TextInventory("default")
            ti.parent = inventory_collection
            ti.set_label("Default collection", "eng")
            self.dispatcher = CollectionDispatcher(inventory_collection)
        else:
            self.dispatcher = dispatcher

        self.__inventory__ = None
        self.__texts__ = []
        self.name = name

        self.logger = logger
        if not logger:
            self.logger = logging.getLogger(name)

        if not name:
            self.name = "repository"

        if cache is None:
            cache = NullCache()

        self.__cache__ = cache
        self.__resources__ = resource

        self.inventory_cache_key = _cache_key("Nautilus", self.name, "Inventory", "Resources")
        self.texts_parsed_cache_key = _cache_key("Nautilus", self.name, "Inventory", "TextsParsed")
Ejemplo n.º 4
0
    def test_dispatching_latin_greek(self):
        tic = CtsTextInventoryCollection()
        latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic)
        latin.set_label("Classical Latin", "eng")
        farsi = CtsTextInventoryMetadata("urn:perseus:farsiLit", parent=tic)
        farsi.set_label("Farsi", "eng")
        gc = CtsTextInventoryMetadata("urn:perseus:greekLit", parent=tic)
        gc.set_label("Ancient Greek", "eng")
        gc.set_label("Grec Ancien", "fre")

        dispatcher = CollectionDispatcher(tic)

        @dispatcher.inventory("urn:perseus:latinLit")
        def dispatchLatinLit(collection, path=None, **kwargs):
            if collection.id.startswith("urn:cts:latinLit:"):
                return True
            return False

        @dispatcher.inventory("urn:perseus:farsiLit")
        def dispatchfFarsiLit(collection, path=None, **kwargs):
            if collection.id.startswith("urn:cts:farsiLit:"):
                return True
            return False

        @dispatcher.inventory("urn:perseus:greekLit")
        def dispatchGreekLit(collection, path=None, **kwargs):
            if collection.id.startswith("urn:cts:greekLit:"):
                return True
            return False

        resolver = CtsCapitainsLocalResolver(
            ["./tests/testing_data/latinLit2"],
            dispatcher=dispatcher
        )
        latin_stuff = resolver.getMetadata("urn:perseus:latinLit")
        greek_stuff = resolver.getMetadata("urn:perseus:greekLit")
        farsi_stuff = resolver.getMetadata("urn:perseus:farsiLit")
        self.assertEqual(
            len(latin_stuff.readableDescendants), 20,
            "There should be 20 readable descendants in Latin"
        )
        self.assertIsInstance(
            latin_stuff, CtsTextInventoryMetadata, "should be textinventory"
        )
        self.assertEqual(
            len(greek_stuff.readableDescendants), 6,
            "There should be 6 readable descendants in Greek [6 only in __cts__.xml]"
        )
        self.assertEqual(
            len(farsi_stuff.descendants), 0,
            "There should be nothing in FarsiLit"
        )
        self.assertEqual(
            str(greek_stuff.get_label("fre")), "Grec Ancien",
            "Label should be correct"
        )

        with self.assertRaises(KeyError):
            _ = latin_stuff["urn:cts:greekLit:tlg0003"]
Ejemplo n.º 5
0
def make_dispatcher():
    tic = CtsTextInventoryCollection()
    latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic)
    latin.set_label("Classical Latin", "eng")
    latin.set_label("Latin Classique", "fre")
    dispatcher = CollectionDispatcher(tic)

    @dispatcher.inventory("urn:perseus:latinLit")
    def dispatchLatinLit(collection, path=None, **kwargs):
        if collection.id.startswith("urn:cts:latinLit:"):
            return True
        return False

    return dispatcher
Ejemplo n.º 6
0
    def test_post_work_dispatching_active(self):
        """ Dispatching is working after editions, we dispatch based on citation scheme"""
        tic = CtsTextInventoryCollection()
        poetry = CtsTextInventoryMetadata("urn:perseus:poetry", parent=tic)
        prose = CtsTextInventoryMetadata("urn:perseus:prose", parent=tic)

        dispatcher = CollectionDispatcher(tic, default_inventory_name="urn:perseus:prose")

        @dispatcher.inventory("urn:perseus:poetry")
        def dispatchPoetry(collection, **kwargs):
            for readable in collection.readableDescendants:
                for citation in readable.citation:
                    if citation.name == "line":
                        return True
            return False

        resolver = CtsCapitainsLocalResolver(
            ["./tests/testing_data/latinLit2"],
            dispatcher=dispatcher
        )

        all = resolver.getMetadata().export(Mimetypes.XML.CTS)
        poetry_stuff = resolver.getMetadata("urn:perseus:poetry").export(Mimetypes.XML.CTS)
        prose_stuff = resolver.getMetadata("urn:perseus:prose").export(Mimetypes.XML.CTS)
        get_graph().remove((None, None, None))
        del poetry, prose
        poetry, prose = XmlCtsTextInventoryMetadata.parse(poetry_stuff), XmlCtsTextInventoryMetadata.parse(prose_stuff)
        self.assertEqual(
            len(poetry.textgroups), 3,
            "There should be 3 textgroups in Poetry (Martial, Ovid and Juvenal)"
        )
        self.assertIsInstance(poetry, CtsTextInventoryMetadata, "should be textinventory")
        self.assertEqual(
            len(prose.textgroups), 1,
            "There should be one textgroup in Prose (Greek texts)"
        )
        get_graph().remove((None, None, None))
        del poetry, prose
        all = XmlCtsTextInventoryMetadata.parse(all)
        self.assertEqual(
            len(all.readableDescendants), 26,
            "There should be all 26 readable descendants in the master collection"
        )
Ejemplo n.º 7
0
        self.nemo = Nemo(resolver=NemoResource.endpoint, app=Flask(__name__))


tic = CtsTextInventoryCollection()
latin = XmlCtsTextInventoryMetadata("urn:perseus:latinLit")
latin.parent = tic
latin.set_label("Classical Latin", "eng")
farsi = XmlCtsTextInventoryMetadata("urn:perseus:farsiLit")
farsi.parent = tic
farsi.set_label("Farsi", "eng")
gc = XmlCtsTextInventoryMetadata("urn:perseus:greekLit")
gc.parent = tic
gc.set_label("Ancient Greek", "eng")
gc.set_label("Grec Ancien", "fre")

dispatcher = CollectionDispatcher(tic)


@dispatcher.inventory("urn:perseus:latinLit")
def dispatchLatinLit(collection, path=None, **kwargs):
    if collection.id.startswith("urn:cts:latinLit:"):
        return True
    return False


@dispatcher.inventory("urn:perseus:farsiLit")
def dispatchfFarsiLit(collection, path=None, **kwargs):
    if collection.id.startswith("urn:cts:farsiLit:"):
        return True
    return False
Ejemplo n.º 8
0
def build_resolver(configuration_file):
    """

    :param configuration_file:
    :return: Organizer, Resolver and Cache handler
    """
    with open(configuration_file) as f:
        xml = etree.parse(f)

    directories = [
        # Compute path relative to the configuration files
        relative_folder(configuration_file, directory)
        for directory in xml.xpath("//corpora/corpus/text()")
    ]
    default_collection = None
    general_collection = CtsTextInventoryCollection()
    filters_to_register = []

    for collection in xml.xpath("//collections/collection"):
        identifier = collection.xpath("./identifier/text()")[0]
        if collection.get("default") == "true":
            default_collection = identifier

        current_collection = CtsTextInventoryMetadata(
            identifier, parent=general_collection)
        for name in collection.xpath("./name"):
            current_collection.set_label(name.text, name.get("lang"))

        # We look at dispatching filters in the collection
        for filters in collection.xpath("./filters"):
            # We register prefix filters
            prefix_filters = []
            for prefix in filters.xpath("./id-starts-with/text()"):
                prefix_filters.append(
                    lambda collection: str(collection.id).startswith(prefix))

            # We register citation filters
            citation_filters = []
            for citation_name in filters.xpath("./citation-contains/text()"):
                citation_filters.append(
                    lambda collection: citation_contain_filter(
                        collection, citation_name))

            # We register path based filters
            directory_filters = []
            for target_directory in filters.xpath("./folder/text()"):
                directory_filters.append(
                    lambda collection, path=None: path.startswith(
                        relative_folder(configuration_file, target_directory)))

            filters_to_register += [
                (identifier,
                 collection_dispatcher_builder(collection, prefix_filters,
                                               citation_filters,
                                               directory_filters))
            ]

    # Create the dispatcher
    organizer = CollectionDispatcher(general_collection,
                                     default_inventory_name=default_collection)

    for destination_collection, anonymous_dispatching_function in filters_to_register:
        organizer.add(anonymous_dispatching_function, destination_collection)

    # Set-up the cache folder
    # ToDO : Add a system for redis ?
    cache = None
    for cache_folder in xml.xpath("//cache-folder/text()"):
        cache = FileSystemCache(cache_folder)
    if cache is None:
        cache = SimpleCache()

    resolver = NautilusCTSResolver(resource=directories,
                                   dispatcher=organizer,
                                   cache=cache)

    return organizer, resolver, cache
Ejemplo n.º 9
0
Archivo: app.py Proyecto: KASanders/ps
            str(chapter_number),  # First the reference for the URI as string
            "Pratum Spirituale " +
            str(chapter_number)  # Then the readable format for humans
        ))
    return chapters


# Setting up the collections

general_collection = CtsTextInventoryCollection()

greek_texts = CtsTextInventoryMetadata("greek_texts",
                                       parent=general_collection)
greek_texts.set_label("Greek Texts", "eng")

organizer = CollectionDispatcher(general_collection,
                                 default_inventory_name="id:misc")


@organizer.inventory("greek_texts")
def organize_my_meadow(collection, path=None, **kwargs):
    if collection.id.startswith("urn:cts:greekLit"):
        return True
    return False


flask_app = Flask("Flask Application for Nemo")
resolver = NautilusCTSResolver(["corpora/meadow"], dispatcher=organizer)
resolver.parse()

nautilus_api = FlaskNautilus(prefix="/api", app=flask_app, resolver=resolver)
Ejemplo n.º 10
0
    def test_dispatching_output(self):
        tic = CtsTextInventoryCollection()
        latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic)
        latin.set_label("Classical Latin", "eng")
        farsi = CtsTextInventoryMetadata("urn:perseus:farsiLit", parent=tic)
        farsi.set_label("Farsi", "eng")
        gc = CtsTextInventoryMetadata("urn:perseus:greekLit", parent=tic)
        gc.set_label("Ancient Greek", "eng")
        gc.set_label("Grec Ancien", "fre")

        dispatcher = CollectionDispatcher(tic)

        @dispatcher.inventory("urn:perseus:latinLit")
        def dispatchLatinLit(collection, path=None, **kwargs):
            if collection.id.startswith("urn:cts:latinLit:"):
                return True
            return False

        @dispatcher.inventory("urn:perseus:farsiLit")
        def dispatchfFarsiLit(collection, path=None, **kwargs):
            if collection.id.startswith("urn:cts:farsiLit:"):
                return True
            return False

        @dispatcher.inventory("urn:perseus:greekLit")
        def dispatchGreekLit(collection, path=None, **kwargs):
            if collection.id.startswith("urn:cts:greekLit:"):
                return True
            return False

        resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"],
                                       dispatcher=dispatcher)
        resolver.logger.disabled = True
        resolver.REMOVE_EMPTY = False
        resolver.parse()

        all = resolver.getMetadata().export(Mimetypes.XML.CTS)
        latin_stuff = resolver.getMetadata("urn:perseus:latinLit").export(
            Mimetypes.XML.CTS)
        greek_stuff = resolver.getMetadata("urn:perseus:greekLit").export(
            Mimetypes.XML.CTS)
        farsi_stuff = resolver.getMetadata("urn:perseus:farsiLit").export(
            Mimetypes.XML.CTS)
        get_graph().remove((None, None, None))
        latin_stuff, greek_stuff, farsi_stuff = XmlCtsTextInventoryMetadata.parse(latin_stuff), \
                                                XmlCtsTextInventoryMetadata.parse(greek_stuff), \
                                                XmlCtsTextInventoryMetadata.parse(farsi_stuff)
        self.assertEqual(len(latin_stuff.readableDescendants), 19,
                         "There should be 19 readable descendants in Latin")
        self.assertIsInstance(latin_stuff, CtsTextInventoryMetadata,
                              "should be textinventory")
        self.assertEqual(
            len(greek_stuff.readableDescendants), 6,
            "There should be 6 readable descendants in Greek [6 only in __cts__.xml]"
        )
        self.assertEqual(len(farsi_stuff.descendants), 0,
                         "There should be nothing in FarsiLit")
        self.assertEqual(
            greek_stuff.get_label("fre"),
            None,  # Text inventory have no label in CTS
            "Label should be correct")
        get_graph().remove((None, None, None))
        all = XmlCtsTextInventoryMetadata.parse(all)
        self.assertEqual(
            len(all.readableDescendants), 25,
            "There should be all 25 readable descendants in the master collection"
        )
Ejemplo n.º 11
0
formulae = CtsTextInventoryMetadata('formulae_collection',
                                    parent=general_collection)
formulae.set_label('Formulae', 'ger')
formulae.set_label('Formulae', 'eng')
formulae.set_label('Formulae', 'fre')
chartae = CtsTextInventoryMetadata('other_collection',
                                   parent=general_collection)
chartae.set_label('Andere Texte', 'ger')
chartae.set_label('Other Texts', 'eng')
chartae.set_label('Autres Textes', 'fre')
elexicon = CtsTextInventoryMetadata('lexicon_entries',
                                    parent=general_collection)
elexicon.set_label('Lexikon', 'ger')
elexicon.set_label('Lexicon', 'eng')
elexicon.set_label('Lexique', 'fre')
organizer = CollectionDispatcher(general_collection,
                                 default_inventory_name='other_collection')


@organizer.inventory("formulae_collection")
def organize_formulae(collection, path=None, **kwargs):
    if collection.id.startswith('urn:cts:formulae:andecavensis'):
        return True
    return False


@organizer.inventory("lexicon_entries")
def organize_elexicon(collection, path=None, **kwargs):
    if collection.id.startswith('urn:cts:formulae:elexicon'):
        return True
    return False