예제 #1
0
 def test_get_capabilities_nocites(self):
     """ Check Get Capabilities latinLit data"""
     Repository = NautilusCTSResolver(["./tests/testing_data/latinLit"])
     self.assertEqual(
         len(
             Repository.__getTextMetadata__(
                 urn="urn:cts:latinLit:stoa0045.stoa008.perseus-lat2")[0]),
         0, "Texts without citations were ignored")
예제 #2
0
    def test_dispatching_error(self):
        tic = CtsTextInventoryCollection()
        latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic)
        latin.set_label("Classical Latin", "eng")
        dispatcher = CollectionDispatcher(tic)
        # We remove default dispatcher
        dispatcher.__methods__ = []

        @dispatcher.inventory("urn:perseus:latinLit")
        def dispatchLatinLit(collection, path=None, **kwargs):
            if collection.id.startswith("urn:cts:latinLit:"):
                return True
            return False

        NautilusCTSResolver.RAISE_ON_UNDISPATCHED = True
        with self.assertRaises(Exception):
            resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"],
                                           dispatcher=dispatcher)
            resolver.logger.disabled = True
            resolver.parse()

        NautilusCTSResolver.RAISE_ON_UNDISPATCHED = False
        try:
            resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"],
                                           dispatcher=dispatcher)
            resolver.logger.disabled = True
            resolver.REMOVE_EMPTY = False
            resolver.parse()
        except UndispatchedTextError as E:
            self.fail("UndispatchedTextError should not have been raised")
예제 #3
0
 def test_get_shared_textgroup_cross_repo(self):
     """ Check Get Capabilities """
     Repository = NautilusCTSResolver([
         "./tests/testing_data/farsiLit", "./tests/testing_data/latinLit2"
     ])
     self.assertIsNotNone(
         Repository.__getText__(
             "urn:cts:latinLit:phi1294.phi002.perseus-lat2"),
         "We should find perseus-lat2")
     self.assertIsNotNone(
         Repository.__getText__("urn:cts:latinLit:phi1294.phi002.opp-lat2"),
         "We should find perseus-lat2")
예제 #4
0
 def test_text_resource(self):
     """ Test to get the text resource to perform other queries """
     Repository = NautilusCTSResolver(["./tests/testing_data/farsiLit"])
     text, metadata = Repository.__getText__(
         "urn:cts:farsiLit:hafez.divan.perseus-eng1")
     self.assertEqual(len(text.citation), 4,
                      "Object has a citation property of length 4")
     self.assertEqual(
         text.getTextualNode(
             Reference("1.1.1.1")).export(output=Mimetypes.PLAINTEXT),
         "Ho ! Saki, pass around and offer the bowl (of love for God) : ### ",
         "It should be possible to retrieve text")
예제 #5
0
    def setUp(self):
        output = call([python, "./tests/cts/run_cache.py"], cwd=cwd)
        if output != 0:
            raise Exception("Creating cache failed")

        self.cache = FileSystemCache(subprocess_cache_dir)
        self.resolver = NautilusCTSResolver(resource=subprocess_repository,
                                            cache=self.cache)
        self.resolver.logger.disabled = True

        def x(*k, **kw):
            raise Exception("Parse should not be called")

        self.resolver.parse = x
예제 #6
0
 def test_pagination(self):
     self.assertEqual(NautilusCTSResolver.pagination(2, 30,
                                                     150), (30, 60, 2, 30),
                      " Pagination should return Array limits ")
     self.assertEqual(NautilusCTSResolver.pagination(4, 40, 150),
                      (120, 150, 4, 30),
                      " Pagination should return Array limits ")
     self.assertEqual(NautilusCTSResolver.pagination(5, 40, 150),
                      (120, 150, 4, 30),
                      " Pagination should return Array limits ")
     self.assertEqual(
         NautilusCTSResolver.pagination(5, 100, 150), (100, 150, 2, 50),
         " Pagination should give corrected page and correct count")
     self.assertEqual(
         NautilusCTSResolver.pagination(5, 110, 150), (40, 50, 5, 10),
         " Pagination should use default limit (10) when getting too much ")
예제 #7
0
    def setUp(self):
        # Full creation of app
        self.cache = FileSystemCache(subprocess_cache_dir, default_timeout=0)
        self.resolver = NautilusCTSResolver(subprocess_repository,
                                            dispatcher=make_dispatcher(),
                                            cache=self.cache)
        self.__app__ = Flask("Nautilus")
        self.http_cache = Cache(self.app,
                                config={
                                    'CACHE_TYPE': "filesystem",
                                    "CACHE_DIR": http_cache_dir,
                                    "CACHE_DEFAULT_TIMEOUT": 0
                                })
        self.nautilus = FlaskNautilus(app=self.app,
                                      prefix="/api",
                                      name="nautilus",
                                      resolver=self.resolver,
                                      flask_caching=self.http_cache)

        self.test_client = self.app.test_client()

        # Option to ensure cache works
        self.former_parse = self.resolver.parse

        def x(*k, **kw):
            raise self.ParsingCalled("Parse should not be called")

        self.resolver.parse = x
예제 #8
0
class TestCache(TestCase):
    def setUp(self):
        output = call([python, "./tests/cts/run_cache.py"], cwd=cwd)
        if output != 0:
            raise Exception("Creating cache failed")

        self.cache = FileSystemCache(subprocess_cache_dir)
        self.resolver = NautilusCTSResolver(resource=subprocess_repository,
                                            cache=self.cache)
        self.resolver.logger.disabled = True

        def x(*k, **kw):
            raise Exception("Parse should not be called")

        self.resolver.parse = x

    def tearDown(self):
        self.cache.clear()

    def test_argumentless_metadata(self):
        inventory = self.resolver.getMetadata()

        self.assertIn("Divān (English)", inventory.export(Mimetypes.XML.CTS),
                      "Metadata are there")
        self.assertEqual(len(inventory.readableDescendants), 4)

    def test_first_child(self):
        key = list(self.resolver.getMetadata().children.keys())[0]
        inventory = self.resolver.getMetadata(key)

        self.assertIn("Divān (English)", inventory.export(Mimetypes.XML.CTS),
                      "Metadata are there")
        self.assertEqual(len(inventory.readableDescendants), 4)

    def test_textgroup(self):
        """ Found to fail originally because of different GRAPH constant used across modules
        (one from the cache vs. the world) """
        inventory = self.resolver.getMetadata("urn:cts:farsiLit:hafez")

        self.assertIn("Divān (English)", inventory.export(Mimetypes.XML.CTS),
                      "Metadata are there")
        self.assertEqual(len(inventory.readableDescendants), 3)
예제 #9
0
 def test_restricted_cors(self):
     """ Check that area-restricted cors works """
     app = Flask("Nautilus")
     FlaskNautilus(
         app=app,
         resolver=NautilusCTSResolver(["./tests/test_data/latinLit"]),
         access_Control_Allow_Methods={"r_cts": "OPTIONS", "r_dts_collection": "OPTIONS", "r_dts_collections": "OPTIONS"},
         access_Control_Allow_Origin={"r_cts": "foo.bar", "r_dts_collection":"*", "r_dts_collections":"*"}
     )
     _app = app.test_client()
     self.assertEqual(_app.get("/cts?request=GetCapabilities").headers["Access-Control-Allow-Origin"], "foo.bar")
     self.assertEqual(_app.get("/cts?request=GetCapabilities").headers["Access-Control-Allow-Methods"], "OPTIONS")
예제 #10
0
    def create_app(self):

        app = create_app(TestConfig)
        self.nemo = NemoFormulae(name="InstanceNemo", resolver=NautilusCTSResolver(app.config['CORPUS_FOLDERS']),
                                 app=app, base_url="",
                                 templates={"main": "templates/main",
                                            "errors": "templates/errors",
                                            "auth": "templates/auth",
                                            "search": "templates/search"},
                                 css=["assets/css/theme.css"], js=["assets/js/empty.js"], static_folder="./assets/")

        return app
예제 #11
0
    def test_dispatching_latin_greek(self):
        tic = CtsTextInventoryCollection()
        latin = XmlCtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic)
        latin.set_label("Classical Latin", "eng")
        farsi = XmlCtsTextInventoryMetadata("urn:perseus:farsiLit", parent=tic)
        farsi.set_label("Farsi", "eng")
        gc = XmlCtsTextInventoryMetadata("urn:perseus:greekLit", parent=tic)
        gc.set_label("Ancient Greek", "eng")
        gc.set_label("Grec Ancien", "fre")

        dispatcher = CollectionDispatcher(tic)

        @dispatcher.inventory("urn:perseus:latinLit")
        def dispatchLatinLit(collection, path=None, **kwargs):
            if collection.id.startswith("urn:cts:latinLit:"):
                return True
            return False

        @dispatcher.inventory("urn:perseus:farsiLit")
        def dispatchfFarsiLit(collection, path=None, **kwargs):
            if collection.id.startswith("urn:cts:farsiLit:"):
                return True
            return False

        @dispatcher.inventory("urn:perseus:greekLit")
        def dispatchGreekLit(collection, path=None, **kwargs):
            if collection.id.startswith("urn:cts:greekLit:"):
                return True
            return False

        resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"],
                                       dispatcher=dispatcher)
        resolver.logger.disabled = True
        resolver.REMOVE_EMPTY = False
        resolver.parse()
        latin_stuff = resolver.getMetadata("urn:perseus:latinLit")
        greek_stuff = resolver.getMetadata("urn:perseus:greekLit")
        farsi_stuff = resolver.getMetadata("urn:perseus:farsiLit")
        self.assertEqual(len(latin_stuff.readableDescendants), 19,
                         "There should be 19 readable descendants in Latin")
        self.assertIsInstance(latin_stuff, CtsTextInventoryMetadata,
                              "should be textinventory")
        self.assertEqual(
            len(greek_stuff.readableDescendants), 6,
            "There should be 6 readable descendants in Greek [6 only in __cts__.xml]"
        )
        self.assertEqual(len(farsi_stuff.descendants), 0,
                         "There should be nothing in FarsiLit")
        self.assertEqual(str(greek_stuff.get_label("fre")), "Grec Ancien",
                         "Label should be correct")

        with self.assertRaises(KeyError):
            _ = latin_stuff["urn:cts:greekLit:tlg0003"]
예제 #12
0
파일: cmd.py 프로젝트: rillian/Nautilus
def _commandline(repositories,
                 port=8000,
                 host="127.0.0.1",
                 debug=False,
                 cache=None,
                 cache_path="./cache",
                 redis=None):
    """ Run a CTS API from command line.

    .. warning:: This function should not be used in the production context

    :param repositories:
    :param port:
    :param ip:
    :param debug:
    :param cache:
    :param cache_path:
    :return:
    """

    if cache == "redis":
        nautilus_cache = RedisCache(redis)
        cache_type = "redis"
    elif cache == "filesystem":
        nautilus_cache = FileSystemCache(cache_path)
        cache_type = "simple"
    else:
        nautilus_cache = NullCache()
        cache_type = "simple"

    app = Flask("Nautilus")
    if debug:
        app.logger.setLevel(logging.INFO)

    resolver = NautilusCTSResolver(resource=repositories)
    nautilus = FlaskNautilus(
        app=app,
        resolver=resolver
        #parser_cache=WerkzeugCacheWrapper(nautilus_cache),
        #logger=None
    )
    nautilus.resolver.parse()
    if debug:
        app.run(debug=debug, port=port, host=host)
    else:
        app.debug = debug
        http_server = HTTPServer(WSGIContainer(app))
        http_server.bind(port=port, address=host)
        http_server.start(0)
        IOLoop.current().start()
예제 #13
0
 def test_resource_parser(self):
     """ Test that the initiation finds correctly the resources """
     Repository = NautilusCTSResolver(["./tests/testing_data/farsiLit"])
     self.assertEqual(Repository.inventory["urn:cts:farsiLit:hafez"].urn,
                      URN("urn:cts:farsiLit:hafez"), "Hafez is found")
     self.assertEqual(
         len(Repository.inventory["urn:cts:farsiLit:hafez"].works), 1,
         "Hafez has one child")
     self.assertEqual(
         Repository.inventory["urn:cts:farsiLit:hafez.divan"].urn,
         URN("urn:cts:farsiLit:hafez.divan"), "Divan is found")
     self.assertEqual(
         len(Repository.inventory["urn:cts:farsiLit:hafez.divan"].texts), 3,
         "Divan has 3 children")
def make_resolver(directories=None, cache_directory=None):
    """ Generate the CapiTainS Resolver and add metadata to it
    """
    if directories is None:
        directories = glob.glob("data/raw/corpora/**/**")
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.CRITICAL)

    kwargs = dict(resource=directories, logger=logger)
    if cache_directory:
        kwargs["cache"] = FileSystemCache(cache_directory)
        print("Clearing cache")
        kwargs["cache"].clear()

    resolver = NautilusCTSResolver(**kwargs)
    return resolver
예제 #15
0
 def setUp(self):
     """ Set up a dummy application with a manager """
     nautilus_cache = FileSystemCache("cache_dir")
     nautilus_cache.clear()
     app = Flask("Nautilus")
     resolver = NautilusCTSResolver(["./tests/test_data/latinLit"], cache=nautilus_cache, logger=logger)
     flask_nautilus = FlaskNautilus(
         app=app,
         resolver=resolver,
         flask_caching=Cache(config={'CACHE_TYPE': 'filesystem'}),
         logger=logger
     )
     self.cache_manager = nautilus_cache
     self.nautilus = flask_nautilus
     self.resolver = resolver
     self.resolver.logger.disabled = True
     self.manager = FlaskNautilusManager(resolver, flask_nautilus)
예제 #16
0
    def setUp(self):
        nautilus_cache = RedisCache()
        app = Flask("Nautilus")
        self.cache = Cache(config={'CACHE_TYPE': 'simple'})
        self.nautilus = FlaskNautilus(
            app=app,
            resolver=NautilusCTSResolver(["./tests/test_data/latinLit"]),
            flask_caching=self.cache,
            logger=logger
        )
        app.debug = True
        self.cache.init_app(app)
        self.app = app.test_client()
        self.parent = HttpCtsRetriever("/cts")
        self.resolver = HttpCtsResolver(endpoint=self.parent)
        logassert.setup(self, self.nautilus.logger.name)
        self.nautilus.logger.disabled = True

        def call(this, parameters={}):
            """ Call an endpoint given the parameters

            :param parameters: Dictionary of parameters
            :type parameters: dict
            :rtype: text
            """

            parameters = {
                key: str(parameters[key]) for key in parameters if parameters[key] is not None
            }
            if this.inventory is not None and "inv" not in parameters:
                parameters["inv"] = this.inventory

            request = self.app.get("/cts?{}".format(
                "&".join(
                    ["{}={}".format(key, value) for key, value in parameters.items()])
                )
            )
            self.parent.called.append(parameters)
            return request.data.decode()

        self.parent.called = []
        self.parent.call = lambda x: call(self.parent, x)
예제 #17
0
 def test_missing_text_resource(self):
     """ Test to make sure an UnknownCollection error is raised when a text is missing """
     Repository = NautilusCTSResolver(["./tests/test_data/missing_text"])
     with self.assertRaises(UnknownCollection):
         text, metadata = Repository.__getText__(
             "urn:cts:farsiLit:hafez.divan.missing_text")
예제 #18
0
    latin.set_label("Latin Classique", "fre")
    dispatcher = CollectionDispatcher(tic)

    @dispatcher.inventory("urn:perseus:latinLit")
    def dispatchLatinLit(collection, path=None, **kwargs):
        if collection.id.startswith("urn:cts:latinLit:"):
            return True
        return False

    return dispatcher


nautilus_cache = FileSystemCache(subprocess_cache_dir, default_timeout=0)

resolver = NautilusCTSResolver(subprocess_repository,
                               dispatcher=make_dispatcher(),
                               cache=nautilus_cache)

app = Flask("Nautilus")
http_cache = Cache(app,
                   config={
                       'CACHE_TYPE': "filesystem",
                       "CACHE_DIR": http_cache_dir,
                       "CACHE_DEFAULT_TIMEOUT": 0
                   })
nautilus = FlaskNautilus(app=app,
                         prefix="/api",
                         name="nautilus",
                         resolver=resolver,
                         flask_caching=http_cache)
예제 #19
0
    'urn:cts:greekLit:tlg0020.tlg002.alpheios-text-grc1',
    'urn:cts:greekLit:tlg0020.tlg001.alpheios-text-grc1',
    'urn:cts:greekLit:tlg0011.tlg003.alpheios-text-grc1',
]

excluded_editions = [
    'urn:cts:greekLit:tlg0011.tlg003.perseus-grc2',
]

resolver = NautilusCTSResolver(
    [
        os.path.join(d, o)
        for o in os.listdir(d) if os.path.isdir(os.path.join(d, o))
    ],
    dispatcher=dispatcher,
    filter=lambda t:
    (t.__subtype__ == 'edition' and t.urn.upTo(URN.VERSION
                                               ) not in excluded_editions and
     (str(t.urn.namespace) in unfiltered_collections or str(
         t.urn.textgroup) in allowed_textgroups or t.urn.upTo(URN.WORK) in
      allowed_works or t.urn.upTo(URN.VERSION) in allowed_editions)),
    cache=None)

app = Flask("Nautilus")
app.secret_key = os.environ.get('ALPHEIOS_NEMO_APPKEY', 'appsecret')
client_id = os.environ.get('ALPHEIOS_NEMO_AUTH0_CLIENTID', 'clientidhere')
client_secret = os.environ.get('ALPHEIOS_NEMO_AUTH0_CLIENTSECRET',
                               'clientsecrethere')
proxy_base = os.environ.get('ALPHEIOS_NEMO_PROXYBASE',
                            'http://dev.alpheios.net:5000')
auth_max_age_override = os.environ.get('ALPHEIOS_NEMO_AUTH0_MAX_AGE_SECONDS')
예제 #20
0
 def setUp(self):
     get_graph().remove((None, None, None))
     self.resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"])
예제 #21
0
class TextXMLFolderResolver(TestCase):
    """ Ensure working state of resolver """
    def setUp(self):
        get_graph().remove((None, None, None))
        self.resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"])

    def test_getPassage_full(self):
        """ Test that we can get a full text """
        passage = self.resolver.getTextualNode(
            "urn:cts:latinLit:phi1294.phi002.perseus-lat2")
        self.assertIsInstance(
            passage, Passage,
            "GetPassage should always return passages objects")

        children = list(passage.getReffs())

        # We check the passage is able to perform further requests and is well instantiated
        self.assertEqual(children[0], '1',
                         "Resource should be string identifiers")

        self.assertIn("Hic est quem legis ille, quem requiris,",
                      passage.export(output=Mimetypes.PLAINTEXT),
                      "Export PrototypeText should work correctly")

        self.assertEqual(
            passage.export(output=Mimetypes.PYTHON.ETREE).xpath(
                ".//tei:div[@n='1']/tei:div[@n='1']/tei:l[@n='1']/text()",
                namespaces=XPATH_NAMESPACES,
                magic_string=False),
            ["Hic est quem legis ille, quem requiris, "],
            "Export to Etree should give an Etree or Etree like object")

    def test_getPassage_no_canonical(self):
        """ Test that we can get a subreference text passage where no canonical exists"""
        passage = self.resolver.getTextualNode(
            "urn:cts:latinLit:phi0959.phi010.perseus-eng2", "2")
        self.assertEqual(passage.export(Mimetypes.PLAINTEXT),
                         "Omne fuit Musae carmen inerme meae; ",
                         "Passage should resolve if directly asked")
        with self.assertRaises(UnknownCollection):
            passage = self.resolver.getTextualNode(
                "urn:cts:latinLit:phi0959.phi010", "2")
        with self.assertRaises(InvalidURN):
            passage = self.resolver.getTextualNode("urn:cts:latinLit:phi0959",
                                                   "2")

    def test_getPassage_subreference(self):
        """ Test that we can get a subreference text passage"""
        passage = self.resolver.getTextualNode(
            "urn:cts:latinLit:phi1294.phi002.perseus-lat2", "1.1")

        # We check we made a reroute to GetPassage request
        self.assertIsInstance(
            passage, Passage,
            "GetPassage should always return passages objects")

        children = list(passage.getReffs())

        self.assertEqual(children[0], '1.1.1',
                         "Resource should be string identifiers")

        self.assertIn("Hic est quem legis ille, quem requiris,",
                      passage.export(output=Mimetypes.PLAINTEXT),
                      "Export PrototypeText should work correctly")
        canonical = self.resolver.getTextualNode(
            "urn:cts:latinLit:phi1294.phi002", "1.1")
        self.assertEqual(passage.export(output=Mimetypes.PLAINTEXT),
                         canonical.export(output=Mimetypes.PLAINTEXT),
                         "Canonical text should work")

        self.assertEqual(
            passage.export(output=Mimetypes.PYTHON.ETREE).xpath(
                ".//tei:l[@n='1']/text()",
                namespaces=XPATH_NAMESPACES,
                magic_string=False),
            ["Hic est quem legis ille, quem requiris, "],
            "Export to Etree should give an Etree or Etree like object")

    def test_getPassage_full_metadata(self):
        """ Test that we can get a full text with its metadata"""
        passage = self.resolver.getTextualNode(
            "urn:cts:latinLit:phi1294.phi002.perseus-lat2", metadata=True)

        self.assertIsInstance(
            passage, Passage,
            "GetPassage should always return passages objects")
        self.assertEqual(
            str(passage.metadata[RDF_NAMESPACES.CTS.term("title"),
                                 "eng"]), "Epigrammata",
            "Local Inventory Files should be parsed and aggregated correctly")
        self.assertEqual(
            str(passage.metadata[RDF_NAMESPACES.CTS.term("groupname"),
                                 "eng"]), "Martial",
            "Local Inventory Files should be parsed and aggregated correctly")
        self.assertEqual(
            str(passage.metadata[RDF_NAMESPACES.CTS.term("label"),
                                 "eng"]), "Epigrams",
            "Local Inventory Files should be parsed and aggregated correctly")
        self.assertEqual(
            str(passage.metadata[RDF_NAMESPACES.CTS.term("description"),
                                 "eng"]),
            "M. Valerii Martialis Epigrammaton libri / recognovit W. Heraeus",
            "Local Inventory Files should be parsed and aggregated correctly")
        self.assertEqual(
            passage.citation.name, "book",
            "Local Inventory Files should be parsed and aggregated correctly")
        self.assertEqual(
            len(passage.citation), 3,
            "Local Inventory Files should be parsed and aggregated correctly")

        children = list(passage.getReffs(level=3))
        # We check the passage is able to perform further requests and is well instantiated
        self.assertEqual(children[0], '1.pr.1',
                         "Resource should be string identifiers")

        self.assertIn("Hic est quem legis ille, quem requiris,",
                      passage.export(output=Mimetypes.PLAINTEXT),
                      "Export PrototypeText should work correctly")

        self.assertEqual(
            passage.export(output=Mimetypes.PYTHON.ETREE).xpath(
                ".//tei:div[@n='1']/tei:div[@n='1']/tei:l[@n='1']/text()",
                namespaces=XPATH_NAMESPACES,
                magic_string=False),
            ["Hic est quem legis ille, quem requiris, "],
            "Export to Etree should give an Etree or Etree like object")

    def test_getPassage_prevnext(self):
        """ Test that we can get a full text with its metadata"""
        passage = self.resolver.getTextualNode(
            "urn:cts:latinLit:phi1294.phi002.perseus-lat2",
            subreference="1.1",
            metadata=True)

        self.assertIsInstance(
            passage, Passage,
            "GetPassage should always return passages objects")
        self.assertEqual(passage.prevId, "1.pr",
                         "Previous Passage ID should be parsed")
        self.assertEqual(passage.nextId, "1.2",
                         "Next Passage ID should be parsed")

        children = list(passage.getReffs())
        # Ensure navigability
        self.assertIn(
            "verentia ludant; quae adeo antiquis auctoribus defuit, ut",
            passage.prev.export(output=Mimetypes.PLAINTEXT),
            "Left and Right Navigation should be available")
        self.assertIn("Qui tecum cupis esse meos ubicumque libellos ",
                      passage.next.export(output=Mimetypes.PLAINTEXT),
                      "Left and Right Navigation should be available")

        # We check the passage is able to perform further requests and is well instantiated
        self.assertEqual(children[0], '1.1.1',
                         "Resource should be string identifiers")

        self.assertIn("Hic est quem legis ille, quem requiris,",
                      passage.export(output=Mimetypes.PLAINTEXT),
                      "Export PrototypeText should work correctly")

        self.assertEqual(
            passage.export(output=Mimetypes.PYTHON.ETREE).xpath(
                ".//tei:l[@n='1']/text()",
                namespaces=XPATH_NAMESPACES,
                magic_string=False),
            ["Hic est quem legis ille, quem requiris, "],
            "Export to Etree should give an Etree or Etree like object")

    def test_getPassage_metadata_prevnext(self):
        """ Test that we can get a full text with its metadata"""
        passage = self.resolver.getTextualNode(
            "urn:cts:latinLit:phi1294.phi002.perseus-lat2",
            subreference="1.1",
            metadata=True,
            prevnext=True)
        self.assertIsInstance(
            passage, Passage,
            "GetPassage should always return passages objects")
        self.assertEqual(
            str(passage.metadata[RDF_NAMESPACES.CTS.term("title"),
                                 "eng"]), "Epigrammata",
            "Local Inventory Files should be parsed and aggregated correctly")
        self.assertEqual(
            str(passage.metadata[RDF_NAMESPACES.CTS.term("groupname"),
                                 "eng"]), "Martial",
            "Local Inventory Files should be parsed and aggregated correctly")
        self.assertEqual(
            str(passage.metadata[RDF_NAMESPACES.CTS.term("label"),
                                 "eng"]), "Epigrams",
            "Local Inventory Files should be parsed and aggregated correctly")
        self.assertEqual(
            str(passage.metadata[RDF_NAMESPACES.CTS.term("description"),
                                 "eng"]),
            "M. Valerii Martialis Epigrammaton libri / recognovit W. Heraeus",
            "Local Inventory Files should be parsed and aggregated correctly")
        self.assertEqual(
            passage.citation.name, "book",
            "Local Inventory Files should be parsed and aggregated correctly")
        self.assertEqual(
            len(passage.citation), 3,
            "Local Inventory Files should be parsed and aggregated correctly")
        self.assertEqual(passage.prevId, "1.pr",
                         "Previous Passage ID should be parsed")
        self.assertEqual(passage.nextId, "1.2",
                         "Next Passage ID should be parsed")
        children = list(passage.getReffs())
        # Ensure navigability
        self.assertIn(
            "verentia ludant; quae adeo antiquis auctoribus defuit, ut",
            passage.prev.export(output=Mimetypes.PLAINTEXT),
            "Left and Right Navigation should be available")
        self.assertIn("Qui tecum cupis esse meos ubicumque libellos ",
                      passage.next.export(output=Mimetypes.PLAINTEXT),
                      "Left and Right Navigation should be available")

        # We check the passage is able to perform further requests and is well instantiated
        self.assertEqual(children[0], '1.1.1',
                         "Resource should be string identifiers")

        self.assertIn("Hic est quem legis ille, quem requiris,",
                      passage.export(output=Mimetypes.PLAINTEXT),
                      "Export PrototypeText should work correctly")

        self.assertEqual(
            passage.export(output=Mimetypes.PYTHON.ETREE).xpath(
                ".//tei:l[@n='1']/text()",
                namespaces=XPATH_NAMESPACES,
                magic_string=False),
            ["Hic est quem legis ille, quem requiris, "],
            "Export to Etree should give an Etree or Etree like object")

    def test_getMetadata_full(self):
        """ Checks retrieval of Metadata information """
        metadata = self.resolver.getMetadata()
        self.assertIsInstance(metadata, Collection,
                              "Resolver should return a collection object")
        self.assertIsInstance(metadata.members[0], Collection,
                              "Members of Inventory should be TextGroups")
        self.assertEqual(
            len(metadata.descendants), 43,
            "There should be as many descendants as there is edition, translation, works and textgroup + 1 for "
            "default inventory")
        self.assertEqual(
            len(metadata.readableDescendants), 25,
            "There should be as many readable descendants as there is edition, translation(25 ed+tr)"
        )
        self.assertEqual(
            len([
                x for x in metadata.readableDescendants
                if isinstance(x, CtsTextMetadata)
            ]), 25,
            "There should be 24 editions + 1 translations in readableDescendants"
        )
        self.assertEqual(
            len(
                metadata.export(output=Mimetypes.PYTHON.ETREE).xpath(
                    "//ti:edition[@urn='urn:cts:latinLit:phi1294.phi002.perseus-lat2']",
                    namespaces=XPATH_NAMESPACES)), 1,
            "There should be one node in exported format corresponding to lat2"
        )
        self.assertCountEqual([
            x["@id"] for x in metadata.export(
                output=Mimetypes.JSON.DTS.Std)["@graph"]["dts:members"]
        ], [
            "urn:cts:latinLit:phi1294", "urn:cts:latinLit:phi0959",
            "urn:cts:greekLit:tlg0003", "urn:cts:latinLit:phi1276"
        ], "There should be 4 Members in DTS JSON")

    def test_getMetadata_subset(self):
        """ Checks retrieval of Metadata information """
        metadata = self.resolver.getMetadata(
            objectId="urn:cts:latinLit:phi1294.phi002")
        self.assertIsInstance(metadata, Collection,
                              "Resolver should return a collection object")
        self.assertIsInstance(metadata.members[0], CtsTextMetadata,
                              "Members of PrototypeWork should be Texts")
        self.assertEqual(
            len(metadata.descendants), 1,
            "There should be as many descendants as there is edition, translation"
        )
        self.assertEqual(len(metadata.readableDescendants), 1,
                         "There should be 1 edition in readableDescendants")
        self.assertEqual(
            len([
                x for x in metadata.readableDescendants
                if isinstance(x, CtsTextMetadata)
            ]), 1, "There should be 1 edition in readableDescendants")
        self.assertIsInstance(metadata.parent, CtsTextgroupMetadata,
                              "First parent should be PrototypeTextGroup")
        self.assertIsInstance(metadata.parents[0], CtsTextgroupMetadata,
                              "First parent should be PrototypeTextGroup")
        self.assertEqual(
            len(
                metadata.export(output=Mimetypes.PYTHON.ETREE).xpath(
                    "//ti:edition[@urn='urn:cts:latinLit:phi1294.phi002.perseus-lat2']",
                    namespaces=XPATH_NAMESPACES)), 1,
            "There should be one node in exported format corresponding to lat2"
        )
        self.assertEqual([
            x["@id"] for x in metadata.export(
                output=Mimetypes.JSON.DTS.Std)["@graph"]["dts:members"]
        ], ["urn:cts:latinLit:phi1294.phi002.perseus-lat2"],
                         "There should be one member in DTS JSON")

        tr = self.resolver.getMetadata(
            objectId="urn:cts:greekLit:tlg0003.tlg001.opp-fre1")
        self.assertIsInstance(tr, CtsTranslationMetadata,
                              "Metadata should be translation")
        self.assertIn("Histoire de la Guerre du Péloponnése",
                      tr.get_description("eng"),
                      "Description should be the right one")

    def test_getSiblings(self):
        """ Ensure getSiblings works well """
        previous, nextious = self.resolver.getSiblings(
            textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2",
            subreference="1.1")
        self.assertEqual(previous, "1.pr", "Previous should be well computed")
        self.assertEqual(nextious, "1.2", "Previous should be well computed")

    def test_getSiblings_nextOnly(self):
        """ Ensure getSiblings works well when there is only the next passage"""
        previous, nextious = self.resolver.getSiblings(
            textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2",
            subreference="1.pr")
        self.assertEqual(previous, None, "Previous Should not exist")
        self.assertEqual(nextious, "1.1", "Next should be well computed")

    def test_getSiblings_prevOnly(self):
        """ Ensure getSiblings works well when there is only the previous passage"""
        previous, nextious = self.resolver.getSiblings(
            textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2",
            subreference="14.223")
        self.assertEqual(previous, "14.222",
                         "Previous should be well computed")
        self.assertEqual(nextious, None, "Next should not exist")

    def test_getReffs_full(self):
        """ Ensure getReffs works well """
        reffs = self.resolver.getReffs(
            textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2", level=1)
        self.assertEqual(len(reffs), 14, "There should be 14 books")
        self.assertEqual(reffs[0], "1")

        reffs = self.resolver.getReffs(
            textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2", level=2)
        self.assertEqual(len(reffs), 1527, "There should be 1527 poems")
        self.assertEqual(reffs[0], "1.pr")

        reffs = self.resolver.getReffs(
            textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2",
            subreference="1.1",
            level=1)
        self.assertEqual(len(reffs), 6, "There should be 6 references")
        self.assertEqual(reffs[0], "1.1.1")
예제 #22
0
 def test_get_capabilities(self):
     """ Check Get Capabilities """
     Repository = NautilusCTSResolver(["./tests/testing_data/farsiLit"])
     Repository.parse()
     self.assertEqual(len(Repository.__getTextMetadata__()[0]), 4,
                      "General no filter works")
     self.assertEqual(
         len(Repository.__getTextMetadata__(category="edition")[0]), 2,
         "Type filter works")
     self.assertEqual(len(Repository.__getTextMetadata__(lang="ger")[0]), 1,
                      "Filtering on language works")
     self.assertEqual(
         len(
             Repository.__getTextMetadata__(category="edition",
                                            lang="ger")[0]), 0,
         "Type filter + lang works")
     self.assertEqual(
         len(
             Repository.__getTextMetadata__(category="translation",
                                            lang="ger")[0]), 1,
         "Type filter + lang works")
     self.assertEqual(
         len(
             Repository.__getTextMetadata__(page=1,
                                            limit=2,
                                            pagination=True)[0]), 2,
         "Pagination works without other filters")
     self.assertEqual(
         len(
             Repository.__getTextMetadata__(page=2,
                                            limit=2,
                                            pagination=True)[0]), 2,
         "Pagination works without other filters at list end")
     self.assertEqual(
         len(Repository.__getTextMetadata__(urn="urn:cts:farsiLit")[0]), 3,
         "URN Filtering works")
     self.assertEqual(
         len(Repository.__getTextMetadata__(urn="urn:cts:latinLit")[0]), 1,
         "URN Filtering works")
     self.assertEqual(
         len(
             Repository.__getTextMetadata__(
                 urn="urn:cts:farsiLit:hafez.divan.perseus-eng1")[0]), 1,
         "Complete URN filtering works")
예제 #23
0
import os

from capitains_nautilus.cts.resolver import NautilusCTSResolver
from werkzeug.contrib.cache import FileSystemCache

root_path = "/var/lib/nautilus"
data_path = os.path.join(root_path, "data")
cache_path = os.path.join(root_path, "cache")

if not os.path.exists(data_path):
    os.mkdir(data_path)
if not os.path.exists(cache_path):
    os.mkdir(cache_path)

cache = FileSystemCache(cache_path)

resolver = NautilusCTSResolver([
    os.path.join(data_path, entry) for entry in os.listdir(data_path)
    if os.path.isdir(os.path.join(data_path, entry))
],
                               cache=cache)


def preload():
    resolver.getMetadata(objectId=None)
예제 #24
0
파일: app.py 프로젝트: KASanders/ps
                                       parent=general_collection)
greek_texts.set_label("Greek Texts", "eng")

organizer = CollectionDispatcher(general_collection,
                                 default_inventory_name="id:misc")


@organizer.inventory("greek_texts")
def organize_my_meadow(collection, path=None, **kwargs):
    if collection.id.startswith("urn:cts:greekLit"):
        return True
    return False


flask_app = Flask("Flask Application for Nemo")
resolver = NautilusCTSResolver(["corpora/meadow"], dispatcher=organizer)
resolver.parse()

nautilus_api = FlaskNautilus(prefix="/api", app=flask_app, resolver=resolver)

nemo = Nemo(
    name="InstanceNemo",
    app=flask_app,
    resolver=resolver,
    base_url="",
    css=["assets/css/theme.css"],
    js=["assets/js/alpheios.js"],
    statics=["assets/images/logo.jpg"],
    transform={"default": "components/main.xsl"},
    templates={"main": "templates/main"},
    chunker={"urn:cts:greekLit:tlg2856.tlg001.1st1K-grc1": meadow_chunker})
예제 #25
0
    def test_dispatching_output(self):
        tic = CtsTextInventoryCollection()
        latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic)
        latin.set_label("Classical Latin", "eng")
        farsi = CtsTextInventoryMetadata("urn:perseus:farsiLit", parent=tic)
        farsi.set_label("Farsi", "eng")
        gc = CtsTextInventoryMetadata("urn:perseus:greekLit", parent=tic)
        gc.set_label("Ancient Greek", "eng")
        gc.set_label("Grec Ancien", "fre")

        dispatcher = CollectionDispatcher(tic)

        @dispatcher.inventory("urn:perseus:latinLit")
        def dispatchLatinLit(collection, path=None, **kwargs):
            if collection.id.startswith("urn:cts:latinLit:"):
                return True
            return False

        @dispatcher.inventory("urn:perseus:farsiLit")
        def dispatchfFarsiLit(collection, path=None, **kwargs):
            if collection.id.startswith("urn:cts:farsiLit:"):
                return True
            return False

        @dispatcher.inventory("urn:perseus:greekLit")
        def dispatchGreekLit(collection, path=None, **kwargs):
            if collection.id.startswith("urn:cts:greekLit:"):
                return True
            return False

        resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"],
                                       dispatcher=dispatcher)
        resolver.logger.disabled = True
        resolver.REMOVE_EMPTY = False
        resolver.parse()

        all = resolver.getMetadata().export(Mimetypes.XML.CTS)
        latin_stuff = resolver.getMetadata("urn:perseus:latinLit").export(
            Mimetypes.XML.CTS)
        greek_stuff = resolver.getMetadata("urn:perseus:greekLit").export(
            Mimetypes.XML.CTS)
        farsi_stuff = resolver.getMetadata("urn:perseus:farsiLit").export(
            Mimetypes.XML.CTS)
        get_graph().remove((None, None, None))
        latin_stuff, greek_stuff, farsi_stuff = XmlCtsTextInventoryMetadata.parse(latin_stuff), \
                                                XmlCtsTextInventoryMetadata.parse(greek_stuff), \
                                                XmlCtsTextInventoryMetadata.parse(farsi_stuff)
        self.assertEqual(len(latin_stuff.readableDescendants), 19,
                         "There should be 19 readable descendants in Latin")
        self.assertIsInstance(latin_stuff, CtsTextInventoryMetadata,
                              "should be textinventory")
        self.assertEqual(
            len(greek_stuff.readableDescendants), 6,
            "There should be 6 readable descendants in Greek [6 only in __cts__.xml]"
        )
        self.assertEqual(len(farsi_stuff.descendants), 0,
                         "There should be nothing in FarsiLit")
        self.assertEqual(
            greek_stuff.get_label("fre"),
            None,  # Text inventory have no label in CTS
            "Label should be correct")
        get_graph().remove((None, None, None))
        all = XmlCtsTextInventoryMetadata.parse(all)
        self.assertEqual(
            len(all.readableDescendants), 25,
            "There should be all 25 readable descendants in the master collection"
        )
예제 #26
0
# -*- coding: utf-8 -*-
from flask import Flask
from capitains_nautilus.cts.resolver import NautilusCTSResolver
from capitains_nautilus.flask_ext import FlaskNautilus
from fullnemo import FullNemo
#from flask_nemo.fullnemo import FullNemo
from dispatcher import dispatcher

flask_app = Flask("Flask Application for Nemo")
resolver = NautilusCTSResolver(["/usr/share/dh-data/theses"], dispatcher=dispatcher)
#resolver.parse()

nautilus_api = FlaskNautilus(prefix="/nemo/api", app=flask_app, resolver=resolver)

nemo = FullNemo(
    name="Positions de thèse",
    app=flask_app,
    resolver=resolver,
    base_url="/nemo",
    css=["assets/css/html.css", "assets/css/postprod.css"],
    js=["assets/js/Tree.js", "assets/js/postprod.js"],
    statics=["assets/images/logo.png"],
    transform={"default": "assets/xsl/tei2html.xsl", "common" : "assets/xsl/common.xsl"},
    templates={"main": "templates/main"}
)

if __name__ == "__main__":
    flask_app.run(debug=True)
예제 #27
0
from werkzeug.contrib.cache import FileSystemCache
from capitains_nautilus.cts.resolver import NautilusCTSResolver
from capitains_nautilus.flask_ext import FlaskNautilus
from . import create_app
from .nemo import NemoFormulae
from .dispatcher_builder import organizer

flask_app = create_app()
resolver = NautilusCTSResolver(
    flask_app.config['CORPUS_FOLDERS'],
    dispatcher=organizer,
    # cache=FileSystemCache(flask_app.config['CACHE_DIRECTORY'])
)
# nautilus_api = FlaskNautilus(prefix="/api", resolver=resolver, app=flask_app)

nemo = NemoFormulae(name="InstanceNemo",
                    app=flask_app,
                    resolver=resolver,
                    base_url="",
                    css=["assets/css/theme.css"],
                    js=["assets/js/empty.js"],
                    static_folder="./assets/",
                    transform={
                        "default": "components/epidoc.xsl",
                        "notes": "components/extract_notes.xsl"
                    },
                    templates={
                        "main": "templates/main",
                        "errors": "templates/errors",
                        "auth": "templates/auth",
                        "search": "templates/search"
예제 #28
0
fro = PrototypeTextInventory("urn:geste", parent=tic) #Rien à voir avec les identifiants cts, c'est un identifiant de projet
fro.set_label("Corpus de chansons de geste", "fro")

dispatcher = CollectionDispatcher(tic)

@dispatcher.inventory("urn:geste")
def dispatchGeste(collection, path=None, **kwargs):
    if collection.id.startswith("urn:cts:froLit"): #et cette fois, c'est bien du cts et on file le début des chemins de citation.
        return True
    return False

cache = Cache()

NautilusDummy = NautilusCTSResolver(
    [
        "."
    ],
    dispatcher=dispatcher
)
NautilusDummy.logger.setLevel(logging.ERROR)

def scheme_grouper(text, getreffs):
    level = len(text.citation)
    groupby = 100 
    types = [citation.name for citation in text.citation]

    if 'word' in types:
        types = types[:types.index("word")]
    if str(text.id) == "urn:cts:latinLit:stoa0040.stoa062.opp-lat1":
        level, groupby = 1, 2
    elif types == ["vers", "mot"]:
        level, groupby = 1, 100
예제 #29
0
def build_resolver(configuration_file):
    """

    :param configuration_file:
    :return: Organizer, Resolver and Cache handler
    """
    with open(configuration_file) as f:
        xml = etree.parse(f)

    directories = [
        # Compute path relative to the configuration files
        relative_folder(configuration_file, directory)
        for directory in xml.xpath("//corpora/corpus/text()")
    ]
    default_collection = None
    general_collection = CtsTextInventoryCollection()
    filters_to_register = []

    for collection in xml.xpath("//collections/collection"):
        identifier = collection.xpath("./identifier/text()")[0]
        if collection.get("default") == "true":
            default_collection = identifier

        current_collection = CtsTextInventoryMetadata(
            identifier, parent=general_collection)
        for name in collection.xpath("./name"):
            current_collection.set_label(name.text, name.get("lang"))

        # We look at dispatching filters in the collection
        for filters in collection.xpath("./filters"):
            # We register prefix filters
            prefix_filters = []
            for prefix in filters.xpath("./id-starts-with/text()"):
                prefix_filters.append(
                    lambda collection: str(collection.id).startswith(prefix))

            # We register citation filters
            citation_filters = []
            for citation_name in filters.xpath("./citation-contains/text()"):
                citation_filters.append(
                    lambda collection: citation_contain_filter(
                        collection, citation_name))

            # We register path based filters
            directory_filters = []
            for target_directory in filters.xpath("./folder/text()"):
                directory_filters.append(
                    lambda collection, path=None: path.startswith(
                        relative_folder(configuration_file, target_directory)))

            filters_to_register += [
                (identifier,
                 collection_dispatcher_builder(collection, prefix_filters,
                                               citation_filters,
                                               directory_filters))
            ]

    # Create the dispatcher
    organizer = CollectionDispatcher(general_collection,
                                     default_inventory_name=default_collection)

    for destination_collection, anonymous_dispatching_function in filters_to_register:
        organizer.add(anonymous_dispatching_function, destination_collection)

    # Set-up the cache folder
    # ToDO : Add a system for redis ?
    cache = None
    for cache_folder in xml.xpath("//cache-folder/text()"):
        cache = FileSystemCache(cache_folder)
    if cache is None:
        cache = SimpleCache()

    resolver = NautilusCTSResolver(resource=directories,
                                   dispatcher=organizer,
                                   cache=cache)

    return organizer, resolver, cache
예제 #30
0
import os

import flask
from flask_nemo import Nemo
from capitains_nautilus.cts.resolver import NautilusCTSResolver
from capitains_nautilus.flask_ext import FlaskNautilus

app = flask.Flask("CTS webserver demo (nemo)")

LOCAL_CONFIG = 'config.py'
if os.path.exists(LOCAL_CONFIG):
    app.config.from_pyfile(LOCAL_CONFIG)

corpora = [entry.path for entry in os.scandir('corpora') if entry.is_dir()]
print('app: resolver given', corpora)
resolver = NautilusCTSResolver(corpora)
resolver.parse()

nautilus = FlaskNautilus(prefix='/api', app=app, resolver=resolver)

nemo = Nemo(name='Nemo', app=app, resolver=resolver, base_url='/nemo')


@app.route('/')
def home():
    '''Placeholder home page to help visitors.

    We're mainly here to serve the nemo browser and api endoints,
    but provide a simple landing page in case we're serving the
    whole domain.'''
    return flask.render_template('index.html',