def test_get_capabilities_nocites(self): """ Check Get Capabilities latinLit data""" Repository = NautilusCTSResolver(["./tests/testing_data/latinLit"]) self.assertEqual( len( Repository.__getTextMetadata__( urn="urn:cts:latinLit:stoa0045.stoa008.perseus-lat2")[0]), 0, "Texts without citations were ignored")
def test_dispatching_error(self): tic = CtsTextInventoryCollection() latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic) latin.set_label("Classical Latin", "eng") dispatcher = CollectionDispatcher(tic) # We remove default dispatcher dispatcher.__methods__ = [] @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False NautilusCTSResolver.RAISE_ON_UNDISPATCHED = True with self.assertRaises(Exception): resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"], dispatcher=dispatcher) resolver.logger.disabled = True resolver.parse() NautilusCTSResolver.RAISE_ON_UNDISPATCHED = False try: resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"], dispatcher=dispatcher) resolver.logger.disabled = True resolver.REMOVE_EMPTY = False resolver.parse() except UndispatchedTextError as E: self.fail("UndispatchedTextError should not have been raised")
def test_get_shared_textgroup_cross_repo(self): """ Check Get Capabilities """ Repository = NautilusCTSResolver([ "./tests/testing_data/farsiLit", "./tests/testing_data/latinLit2" ]) self.assertIsNotNone( Repository.__getText__( "urn:cts:latinLit:phi1294.phi002.perseus-lat2"), "We should find perseus-lat2") self.assertIsNotNone( Repository.__getText__("urn:cts:latinLit:phi1294.phi002.opp-lat2"), "We should find perseus-lat2")
def test_text_resource(self): """ Test to get the text resource to perform other queries """ Repository = NautilusCTSResolver(["./tests/testing_data/farsiLit"]) text, metadata = Repository.__getText__( "urn:cts:farsiLit:hafez.divan.perseus-eng1") self.assertEqual(len(text.citation), 4, "Object has a citation property of length 4") self.assertEqual( text.getTextualNode( Reference("1.1.1.1")).export(output=Mimetypes.PLAINTEXT), "Ho ! Saki, pass around and offer the bowl (of love for God) : ### ", "It should be possible to retrieve text")
def setUp(self): output = call([python, "./tests/cts/run_cache.py"], cwd=cwd) if output != 0: raise Exception("Creating cache failed") self.cache = FileSystemCache(subprocess_cache_dir) self.resolver = NautilusCTSResolver(resource=subprocess_repository, cache=self.cache) self.resolver.logger.disabled = True def x(*k, **kw): raise Exception("Parse should not be called") self.resolver.parse = x
def test_pagination(self): self.assertEqual(NautilusCTSResolver.pagination(2, 30, 150), (30, 60, 2, 30), " Pagination should return Array limits ") self.assertEqual(NautilusCTSResolver.pagination(4, 40, 150), (120, 150, 4, 30), " Pagination should return Array limits ") self.assertEqual(NautilusCTSResolver.pagination(5, 40, 150), (120, 150, 4, 30), " Pagination should return Array limits ") self.assertEqual( NautilusCTSResolver.pagination(5, 100, 150), (100, 150, 2, 50), " Pagination should give corrected page and correct count") self.assertEqual( NautilusCTSResolver.pagination(5, 110, 150), (40, 50, 5, 10), " Pagination should use default limit (10) when getting too much ")
def setUp(self): # Full creation of app self.cache = FileSystemCache(subprocess_cache_dir, default_timeout=0) self.resolver = NautilusCTSResolver(subprocess_repository, dispatcher=make_dispatcher(), cache=self.cache) self.__app__ = Flask("Nautilus") self.http_cache = Cache(self.app, config={ 'CACHE_TYPE': "filesystem", "CACHE_DIR": http_cache_dir, "CACHE_DEFAULT_TIMEOUT": 0 }) self.nautilus = FlaskNautilus(app=self.app, prefix="/api", name="nautilus", resolver=self.resolver, flask_caching=self.http_cache) self.test_client = self.app.test_client() # Option to ensure cache works self.former_parse = self.resolver.parse def x(*k, **kw): raise self.ParsingCalled("Parse should not be called") self.resolver.parse = x
class TestCache(TestCase): def setUp(self): output = call([python, "./tests/cts/run_cache.py"], cwd=cwd) if output != 0: raise Exception("Creating cache failed") self.cache = FileSystemCache(subprocess_cache_dir) self.resolver = NautilusCTSResolver(resource=subprocess_repository, cache=self.cache) self.resolver.logger.disabled = True def x(*k, **kw): raise Exception("Parse should not be called") self.resolver.parse = x def tearDown(self): self.cache.clear() def test_argumentless_metadata(self): inventory = self.resolver.getMetadata() self.assertIn("Divān (English)", inventory.export(Mimetypes.XML.CTS), "Metadata are there") self.assertEqual(len(inventory.readableDescendants), 4) def test_first_child(self): key = list(self.resolver.getMetadata().children.keys())[0] inventory = self.resolver.getMetadata(key) self.assertIn("Divān (English)", inventory.export(Mimetypes.XML.CTS), "Metadata are there") self.assertEqual(len(inventory.readableDescendants), 4) def test_textgroup(self): """ Found to fail originally because of different GRAPH constant used across modules (one from the cache vs. the world) """ inventory = self.resolver.getMetadata("urn:cts:farsiLit:hafez") self.assertIn("Divān (English)", inventory.export(Mimetypes.XML.CTS), "Metadata are there") self.assertEqual(len(inventory.readableDescendants), 3)
def test_restricted_cors(self): """ Check that area-restricted cors works """ app = Flask("Nautilus") FlaskNautilus( app=app, resolver=NautilusCTSResolver(["./tests/test_data/latinLit"]), access_Control_Allow_Methods={"r_cts": "OPTIONS", "r_dts_collection": "OPTIONS", "r_dts_collections": "OPTIONS"}, access_Control_Allow_Origin={"r_cts": "foo.bar", "r_dts_collection":"*", "r_dts_collections":"*"} ) _app = app.test_client() self.assertEqual(_app.get("/cts?request=GetCapabilities").headers["Access-Control-Allow-Origin"], "foo.bar") self.assertEqual(_app.get("/cts?request=GetCapabilities").headers["Access-Control-Allow-Methods"], "OPTIONS")
def create_app(self): app = create_app(TestConfig) self.nemo = NemoFormulae(name="InstanceNemo", resolver=NautilusCTSResolver(app.config['CORPUS_FOLDERS']), app=app, base_url="", templates={"main": "templates/main", "errors": "templates/errors", "auth": "templates/auth", "search": "templates/search"}, css=["assets/css/theme.css"], js=["assets/js/empty.js"], static_folder="./assets/") return app
def test_dispatching_latin_greek(self): tic = CtsTextInventoryCollection() latin = XmlCtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic) latin.set_label("Classical Latin", "eng") farsi = XmlCtsTextInventoryMetadata("urn:perseus:farsiLit", parent=tic) farsi.set_label("Farsi", "eng") gc = XmlCtsTextInventoryMetadata("urn:perseus:greekLit", parent=tic) gc.set_label("Ancient Greek", "eng") gc.set_label("Grec Ancien", "fre") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False @dispatcher.inventory("urn:perseus:farsiLit") def dispatchfFarsiLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:farsiLit:"): return True return False @dispatcher.inventory("urn:perseus:greekLit") def dispatchGreekLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:greekLit:"): return True return False resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"], dispatcher=dispatcher) resolver.logger.disabled = True resolver.REMOVE_EMPTY = False resolver.parse() latin_stuff = resolver.getMetadata("urn:perseus:latinLit") greek_stuff = resolver.getMetadata("urn:perseus:greekLit") farsi_stuff = resolver.getMetadata("urn:perseus:farsiLit") self.assertEqual(len(latin_stuff.readableDescendants), 19, "There should be 19 readable descendants in Latin") self.assertIsInstance(latin_stuff, CtsTextInventoryMetadata, "should be textinventory") self.assertEqual( len(greek_stuff.readableDescendants), 6, "There should be 6 readable descendants in Greek [6 only in __cts__.xml]" ) self.assertEqual(len(farsi_stuff.descendants), 0, "There should be nothing in FarsiLit") self.assertEqual(str(greek_stuff.get_label("fre")), "Grec Ancien", "Label should be correct") with self.assertRaises(KeyError): _ = latin_stuff["urn:cts:greekLit:tlg0003"]
def _commandline(repositories, port=8000, host="127.0.0.1", debug=False, cache=None, cache_path="./cache", redis=None): """ Run a CTS API from command line. .. warning:: This function should not be used in the production context :param repositories: :param port: :param ip: :param debug: :param cache: :param cache_path: :return: """ if cache == "redis": nautilus_cache = RedisCache(redis) cache_type = "redis" elif cache == "filesystem": nautilus_cache = FileSystemCache(cache_path) cache_type = "simple" else: nautilus_cache = NullCache() cache_type = "simple" app = Flask("Nautilus") if debug: app.logger.setLevel(logging.INFO) resolver = NautilusCTSResolver(resource=repositories) nautilus = FlaskNautilus( app=app, resolver=resolver #parser_cache=WerkzeugCacheWrapper(nautilus_cache), #logger=None ) nautilus.resolver.parse() if debug: app.run(debug=debug, port=port, host=host) else: app.debug = debug http_server = HTTPServer(WSGIContainer(app)) http_server.bind(port=port, address=host) http_server.start(0) IOLoop.current().start()
def test_resource_parser(self): """ Test that the initiation finds correctly the resources """ Repository = NautilusCTSResolver(["./tests/testing_data/farsiLit"]) self.assertEqual(Repository.inventory["urn:cts:farsiLit:hafez"].urn, URN("urn:cts:farsiLit:hafez"), "Hafez is found") self.assertEqual( len(Repository.inventory["urn:cts:farsiLit:hafez"].works), 1, "Hafez has one child") self.assertEqual( Repository.inventory["urn:cts:farsiLit:hafez.divan"].urn, URN("urn:cts:farsiLit:hafez.divan"), "Divan is found") self.assertEqual( len(Repository.inventory["urn:cts:farsiLit:hafez.divan"].texts), 3, "Divan has 3 children")
def make_resolver(directories=None, cache_directory=None): """ Generate the CapiTainS Resolver and add metadata to it """ if directories is None: directories = glob.glob("data/raw/corpora/**/**") logger = logging.getLogger(__name__) logger.setLevel(logging.CRITICAL) kwargs = dict(resource=directories, logger=logger) if cache_directory: kwargs["cache"] = FileSystemCache(cache_directory) print("Clearing cache") kwargs["cache"].clear() resolver = NautilusCTSResolver(**kwargs) return resolver
def setUp(self): """ Set up a dummy application with a manager """ nautilus_cache = FileSystemCache("cache_dir") nautilus_cache.clear() app = Flask("Nautilus") resolver = NautilusCTSResolver(["./tests/test_data/latinLit"], cache=nautilus_cache, logger=logger) flask_nautilus = FlaskNautilus( app=app, resolver=resolver, flask_caching=Cache(config={'CACHE_TYPE': 'filesystem'}), logger=logger ) self.cache_manager = nautilus_cache self.nautilus = flask_nautilus self.resolver = resolver self.resolver.logger.disabled = True self.manager = FlaskNautilusManager(resolver, flask_nautilus)
def setUp(self): nautilus_cache = RedisCache() app = Flask("Nautilus") self.cache = Cache(config={'CACHE_TYPE': 'simple'}) self.nautilus = FlaskNautilus( app=app, resolver=NautilusCTSResolver(["./tests/test_data/latinLit"]), flask_caching=self.cache, logger=logger ) app.debug = True self.cache.init_app(app) self.app = app.test_client() self.parent = HttpCtsRetriever("/cts") self.resolver = HttpCtsResolver(endpoint=self.parent) logassert.setup(self, self.nautilus.logger.name) self.nautilus.logger.disabled = True def call(this, parameters={}): """ Call an endpoint given the parameters :param parameters: Dictionary of parameters :type parameters: dict :rtype: text """ parameters = { key: str(parameters[key]) for key in parameters if parameters[key] is not None } if this.inventory is not None and "inv" not in parameters: parameters["inv"] = this.inventory request = self.app.get("/cts?{}".format( "&".join( ["{}={}".format(key, value) for key, value in parameters.items()]) ) ) self.parent.called.append(parameters) return request.data.decode() self.parent.called = [] self.parent.call = lambda x: call(self.parent, x)
def test_missing_text_resource(self): """ Test to make sure an UnknownCollection error is raised when a text is missing """ Repository = NautilusCTSResolver(["./tests/test_data/missing_text"]) with self.assertRaises(UnknownCollection): text, metadata = Repository.__getText__( "urn:cts:farsiLit:hafez.divan.missing_text")
latin.set_label("Latin Classique", "fre") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False return dispatcher nautilus_cache = FileSystemCache(subprocess_cache_dir, default_timeout=0) resolver = NautilusCTSResolver(subprocess_repository, dispatcher=make_dispatcher(), cache=nautilus_cache) app = Flask("Nautilus") http_cache = Cache(app, config={ 'CACHE_TYPE': "filesystem", "CACHE_DIR": http_cache_dir, "CACHE_DEFAULT_TIMEOUT": 0 }) nautilus = FlaskNautilus(app=app, prefix="/api", name="nautilus", resolver=resolver, flask_caching=http_cache)
'urn:cts:greekLit:tlg0020.tlg002.alpheios-text-grc1', 'urn:cts:greekLit:tlg0020.tlg001.alpheios-text-grc1', 'urn:cts:greekLit:tlg0011.tlg003.alpheios-text-grc1', ] excluded_editions = [ 'urn:cts:greekLit:tlg0011.tlg003.perseus-grc2', ] resolver = NautilusCTSResolver( [ os.path.join(d, o) for o in os.listdir(d) if os.path.isdir(os.path.join(d, o)) ], dispatcher=dispatcher, filter=lambda t: (t.__subtype__ == 'edition' and t.urn.upTo(URN.VERSION ) not in excluded_editions and (str(t.urn.namespace) in unfiltered_collections or str( t.urn.textgroup) in allowed_textgroups or t.urn.upTo(URN.WORK) in allowed_works or t.urn.upTo(URN.VERSION) in allowed_editions)), cache=None) app = Flask("Nautilus") app.secret_key = os.environ.get('ALPHEIOS_NEMO_APPKEY', 'appsecret') client_id = os.environ.get('ALPHEIOS_NEMO_AUTH0_CLIENTID', 'clientidhere') client_secret = os.environ.get('ALPHEIOS_NEMO_AUTH0_CLIENTSECRET', 'clientsecrethere') proxy_base = os.environ.get('ALPHEIOS_NEMO_PROXYBASE', 'http://dev.alpheios.net:5000') auth_max_age_override = os.environ.get('ALPHEIOS_NEMO_AUTH0_MAX_AGE_SECONDS')
def setUp(self): get_graph().remove((None, None, None)) self.resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"])
class TextXMLFolderResolver(TestCase): """ Ensure working state of resolver """ def setUp(self): get_graph().remove((None, None, None)) self.resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"]) def test_getPassage_full(self): """ Test that we can get a full text """ passage = self.resolver.getTextualNode( "urn:cts:latinLit:phi1294.phi002.perseus-lat2") self.assertIsInstance( passage, Passage, "GetPassage should always return passages objects") children = list(passage.getReffs()) # We check the passage is able to perform further requests and is well instantiated self.assertEqual(children[0], '1', "Resource should be string identifiers") self.assertIn("Hic est quem legis ille, quem requiris,", passage.export(output=Mimetypes.PLAINTEXT), "Export PrototypeText should work correctly") self.assertEqual( passage.export(output=Mimetypes.PYTHON.ETREE).xpath( ".//tei:div[@n='1']/tei:div[@n='1']/tei:l[@n='1']/text()", namespaces=XPATH_NAMESPACES, magic_string=False), ["Hic est quem legis ille, quem requiris, "], "Export to Etree should give an Etree or Etree like object") def test_getPassage_no_canonical(self): """ Test that we can get a subreference text passage where no canonical exists""" passage = self.resolver.getTextualNode( "urn:cts:latinLit:phi0959.phi010.perseus-eng2", "2") self.assertEqual(passage.export(Mimetypes.PLAINTEXT), "Omne fuit Musae carmen inerme meae; ", "Passage should resolve if directly asked") with self.assertRaises(UnknownCollection): passage = self.resolver.getTextualNode( "urn:cts:latinLit:phi0959.phi010", "2") with self.assertRaises(InvalidURN): passage = self.resolver.getTextualNode("urn:cts:latinLit:phi0959", "2") def test_getPassage_subreference(self): """ Test that we can get a subreference text passage""" passage = self.resolver.getTextualNode( "urn:cts:latinLit:phi1294.phi002.perseus-lat2", "1.1") # We check we made a reroute to GetPassage request self.assertIsInstance( passage, Passage, "GetPassage should always return passages objects") children = list(passage.getReffs()) self.assertEqual(children[0], '1.1.1', "Resource should be string identifiers") self.assertIn("Hic est quem legis ille, quem requiris,", passage.export(output=Mimetypes.PLAINTEXT), "Export PrototypeText should work correctly") canonical = self.resolver.getTextualNode( "urn:cts:latinLit:phi1294.phi002", "1.1") self.assertEqual(passage.export(output=Mimetypes.PLAINTEXT), canonical.export(output=Mimetypes.PLAINTEXT), "Canonical text should work") self.assertEqual( passage.export(output=Mimetypes.PYTHON.ETREE).xpath( ".//tei:l[@n='1']/text()", namespaces=XPATH_NAMESPACES, magic_string=False), ["Hic est quem legis ille, quem requiris, "], "Export to Etree should give an Etree or Etree like object") def test_getPassage_full_metadata(self): """ Test that we can get a full text with its metadata""" passage = self.resolver.getTextualNode( "urn:cts:latinLit:phi1294.phi002.perseus-lat2", metadata=True) self.assertIsInstance( passage, Passage, "GetPassage should always return passages objects") self.assertEqual( str(passage.metadata[RDF_NAMESPACES.CTS.term("title"), "eng"]), "Epigrammata", "Local Inventory Files should be parsed and aggregated correctly") self.assertEqual( str(passage.metadata[RDF_NAMESPACES.CTS.term("groupname"), "eng"]), "Martial", "Local Inventory Files should be parsed and aggregated correctly") self.assertEqual( str(passage.metadata[RDF_NAMESPACES.CTS.term("label"), "eng"]), "Epigrams", "Local Inventory Files should be parsed and aggregated correctly") self.assertEqual( str(passage.metadata[RDF_NAMESPACES.CTS.term("description"), "eng"]), "M. Valerii Martialis Epigrammaton libri / recognovit W. Heraeus", "Local Inventory Files should be parsed and aggregated correctly") self.assertEqual( passage.citation.name, "book", "Local Inventory Files should be parsed and aggregated correctly") self.assertEqual( len(passage.citation), 3, "Local Inventory Files should be parsed and aggregated correctly") children = list(passage.getReffs(level=3)) # We check the passage is able to perform further requests and is well instantiated self.assertEqual(children[0], '1.pr.1', "Resource should be string identifiers") self.assertIn("Hic est quem legis ille, quem requiris,", passage.export(output=Mimetypes.PLAINTEXT), "Export PrototypeText should work correctly") self.assertEqual( passage.export(output=Mimetypes.PYTHON.ETREE).xpath( ".//tei:div[@n='1']/tei:div[@n='1']/tei:l[@n='1']/text()", namespaces=XPATH_NAMESPACES, magic_string=False), ["Hic est quem legis ille, quem requiris, "], "Export to Etree should give an Etree or Etree like object") def test_getPassage_prevnext(self): """ Test that we can get a full text with its metadata""" passage = self.resolver.getTextualNode( "urn:cts:latinLit:phi1294.phi002.perseus-lat2", subreference="1.1", metadata=True) self.assertIsInstance( passage, Passage, "GetPassage should always return passages objects") self.assertEqual(passage.prevId, "1.pr", "Previous Passage ID should be parsed") self.assertEqual(passage.nextId, "1.2", "Next Passage ID should be parsed") children = list(passage.getReffs()) # Ensure navigability self.assertIn( "verentia ludant; quae adeo antiquis auctoribus defuit, ut", passage.prev.export(output=Mimetypes.PLAINTEXT), "Left and Right Navigation should be available") self.assertIn("Qui tecum cupis esse meos ubicumque libellos ", passage.next.export(output=Mimetypes.PLAINTEXT), "Left and Right Navigation should be available") # We check the passage is able to perform further requests and is well instantiated self.assertEqual(children[0], '1.1.1', "Resource should be string identifiers") self.assertIn("Hic est quem legis ille, quem requiris,", passage.export(output=Mimetypes.PLAINTEXT), "Export PrototypeText should work correctly") self.assertEqual( passage.export(output=Mimetypes.PYTHON.ETREE).xpath( ".//tei:l[@n='1']/text()", namespaces=XPATH_NAMESPACES, magic_string=False), ["Hic est quem legis ille, quem requiris, "], "Export to Etree should give an Etree or Etree like object") def test_getPassage_metadata_prevnext(self): """ Test that we can get a full text with its metadata""" passage = self.resolver.getTextualNode( "urn:cts:latinLit:phi1294.phi002.perseus-lat2", subreference="1.1", metadata=True, prevnext=True) self.assertIsInstance( passage, Passage, "GetPassage should always return passages objects") self.assertEqual( str(passage.metadata[RDF_NAMESPACES.CTS.term("title"), "eng"]), "Epigrammata", "Local Inventory Files should be parsed and aggregated correctly") self.assertEqual( str(passage.metadata[RDF_NAMESPACES.CTS.term("groupname"), "eng"]), "Martial", "Local Inventory Files should be parsed and aggregated correctly") self.assertEqual( str(passage.metadata[RDF_NAMESPACES.CTS.term("label"), "eng"]), "Epigrams", "Local Inventory Files should be parsed and aggregated correctly") self.assertEqual( str(passage.metadata[RDF_NAMESPACES.CTS.term("description"), "eng"]), "M. Valerii Martialis Epigrammaton libri / recognovit W. Heraeus", "Local Inventory Files should be parsed and aggregated correctly") self.assertEqual( passage.citation.name, "book", "Local Inventory Files should be parsed and aggregated correctly") self.assertEqual( len(passage.citation), 3, "Local Inventory Files should be parsed and aggregated correctly") self.assertEqual(passage.prevId, "1.pr", "Previous Passage ID should be parsed") self.assertEqual(passage.nextId, "1.2", "Next Passage ID should be parsed") children = list(passage.getReffs()) # Ensure navigability self.assertIn( "verentia ludant; quae adeo antiquis auctoribus defuit, ut", passage.prev.export(output=Mimetypes.PLAINTEXT), "Left and Right Navigation should be available") self.assertIn("Qui tecum cupis esse meos ubicumque libellos ", passage.next.export(output=Mimetypes.PLAINTEXT), "Left and Right Navigation should be available") # We check the passage is able to perform further requests and is well instantiated self.assertEqual(children[0], '1.1.1', "Resource should be string identifiers") self.assertIn("Hic est quem legis ille, quem requiris,", passage.export(output=Mimetypes.PLAINTEXT), "Export PrototypeText should work correctly") self.assertEqual( passage.export(output=Mimetypes.PYTHON.ETREE).xpath( ".//tei:l[@n='1']/text()", namespaces=XPATH_NAMESPACES, magic_string=False), ["Hic est quem legis ille, quem requiris, "], "Export to Etree should give an Etree or Etree like object") def test_getMetadata_full(self): """ Checks retrieval of Metadata information """ metadata = self.resolver.getMetadata() self.assertIsInstance(metadata, Collection, "Resolver should return a collection object") self.assertIsInstance(metadata.members[0], Collection, "Members of Inventory should be TextGroups") self.assertEqual( len(metadata.descendants), 43, "There should be as many descendants as there is edition, translation, works and textgroup + 1 for " "default inventory") self.assertEqual( len(metadata.readableDescendants), 25, "There should be as many readable descendants as there is edition, translation(25 ed+tr)" ) self.assertEqual( len([ x for x in metadata.readableDescendants if isinstance(x, CtsTextMetadata) ]), 25, "There should be 24 editions + 1 translations in readableDescendants" ) self.assertEqual( len( metadata.export(output=Mimetypes.PYTHON.ETREE).xpath( "//ti:edition[@urn='urn:cts:latinLit:phi1294.phi002.perseus-lat2']", namespaces=XPATH_NAMESPACES)), 1, "There should be one node in exported format corresponding to lat2" ) self.assertCountEqual([ x["@id"] for x in metadata.export( output=Mimetypes.JSON.DTS.Std)["@graph"]["dts:members"] ], [ "urn:cts:latinLit:phi1294", "urn:cts:latinLit:phi0959", "urn:cts:greekLit:tlg0003", "urn:cts:latinLit:phi1276" ], "There should be 4 Members in DTS JSON") def test_getMetadata_subset(self): """ Checks retrieval of Metadata information """ metadata = self.resolver.getMetadata( objectId="urn:cts:latinLit:phi1294.phi002") self.assertIsInstance(metadata, Collection, "Resolver should return a collection object") self.assertIsInstance(metadata.members[0], CtsTextMetadata, "Members of PrototypeWork should be Texts") self.assertEqual( len(metadata.descendants), 1, "There should be as many descendants as there is edition, translation" ) self.assertEqual(len(metadata.readableDescendants), 1, "There should be 1 edition in readableDescendants") self.assertEqual( len([ x for x in metadata.readableDescendants if isinstance(x, CtsTextMetadata) ]), 1, "There should be 1 edition in readableDescendants") self.assertIsInstance(metadata.parent, CtsTextgroupMetadata, "First parent should be PrototypeTextGroup") self.assertIsInstance(metadata.parents[0], CtsTextgroupMetadata, "First parent should be PrototypeTextGroup") self.assertEqual( len( metadata.export(output=Mimetypes.PYTHON.ETREE).xpath( "//ti:edition[@urn='urn:cts:latinLit:phi1294.phi002.perseus-lat2']", namespaces=XPATH_NAMESPACES)), 1, "There should be one node in exported format corresponding to lat2" ) self.assertEqual([ x["@id"] for x in metadata.export( output=Mimetypes.JSON.DTS.Std)["@graph"]["dts:members"] ], ["urn:cts:latinLit:phi1294.phi002.perseus-lat2"], "There should be one member in DTS JSON") tr = self.resolver.getMetadata( objectId="urn:cts:greekLit:tlg0003.tlg001.opp-fre1") self.assertIsInstance(tr, CtsTranslationMetadata, "Metadata should be translation") self.assertIn("Histoire de la Guerre du Péloponnése", tr.get_description("eng"), "Description should be the right one") def test_getSiblings(self): """ Ensure getSiblings works well """ previous, nextious = self.resolver.getSiblings( textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2", subreference="1.1") self.assertEqual(previous, "1.pr", "Previous should be well computed") self.assertEqual(nextious, "1.2", "Previous should be well computed") def test_getSiblings_nextOnly(self): """ Ensure getSiblings works well when there is only the next passage""" previous, nextious = self.resolver.getSiblings( textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2", subreference="1.pr") self.assertEqual(previous, None, "Previous Should not exist") self.assertEqual(nextious, "1.1", "Next should be well computed") def test_getSiblings_prevOnly(self): """ Ensure getSiblings works well when there is only the previous passage""" previous, nextious = self.resolver.getSiblings( textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2", subreference="14.223") self.assertEqual(previous, "14.222", "Previous should be well computed") self.assertEqual(nextious, None, "Next should not exist") def test_getReffs_full(self): """ Ensure getReffs works well """ reffs = self.resolver.getReffs( textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2", level=1) self.assertEqual(len(reffs), 14, "There should be 14 books") self.assertEqual(reffs[0], "1") reffs = self.resolver.getReffs( textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2", level=2) self.assertEqual(len(reffs), 1527, "There should be 1527 poems") self.assertEqual(reffs[0], "1.pr") reffs = self.resolver.getReffs( textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2", subreference="1.1", level=1) self.assertEqual(len(reffs), 6, "There should be 6 references") self.assertEqual(reffs[0], "1.1.1")
def test_get_capabilities(self): """ Check Get Capabilities """ Repository = NautilusCTSResolver(["./tests/testing_data/farsiLit"]) Repository.parse() self.assertEqual(len(Repository.__getTextMetadata__()[0]), 4, "General no filter works") self.assertEqual( len(Repository.__getTextMetadata__(category="edition")[0]), 2, "Type filter works") self.assertEqual(len(Repository.__getTextMetadata__(lang="ger")[0]), 1, "Filtering on language works") self.assertEqual( len( Repository.__getTextMetadata__(category="edition", lang="ger")[0]), 0, "Type filter + lang works") self.assertEqual( len( Repository.__getTextMetadata__(category="translation", lang="ger")[0]), 1, "Type filter + lang works") self.assertEqual( len( Repository.__getTextMetadata__(page=1, limit=2, pagination=True)[0]), 2, "Pagination works without other filters") self.assertEqual( len( Repository.__getTextMetadata__(page=2, limit=2, pagination=True)[0]), 2, "Pagination works without other filters at list end") self.assertEqual( len(Repository.__getTextMetadata__(urn="urn:cts:farsiLit")[0]), 3, "URN Filtering works") self.assertEqual( len(Repository.__getTextMetadata__(urn="urn:cts:latinLit")[0]), 1, "URN Filtering works") self.assertEqual( len( Repository.__getTextMetadata__( urn="urn:cts:farsiLit:hafez.divan.perseus-eng1")[0]), 1, "Complete URN filtering works")
import os from capitains_nautilus.cts.resolver import NautilusCTSResolver from werkzeug.contrib.cache import FileSystemCache root_path = "/var/lib/nautilus" data_path = os.path.join(root_path, "data") cache_path = os.path.join(root_path, "cache") if not os.path.exists(data_path): os.mkdir(data_path) if not os.path.exists(cache_path): os.mkdir(cache_path) cache = FileSystemCache(cache_path) resolver = NautilusCTSResolver([ os.path.join(data_path, entry) for entry in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, entry)) ], cache=cache) def preload(): resolver.getMetadata(objectId=None)
parent=general_collection) greek_texts.set_label("Greek Texts", "eng") organizer = CollectionDispatcher(general_collection, default_inventory_name="id:misc") @organizer.inventory("greek_texts") def organize_my_meadow(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:greekLit"): return True return False flask_app = Flask("Flask Application for Nemo") resolver = NautilusCTSResolver(["corpora/meadow"], dispatcher=organizer) resolver.parse() nautilus_api = FlaskNautilus(prefix="/api", app=flask_app, resolver=resolver) nemo = Nemo( name="InstanceNemo", app=flask_app, resolver=resolver, base_url="", css=["assets/css/theme.css"], js=["assets/js/alpheios.js"], statics=["assets/images/logo.jpg"], transform={"default": "components/main.xsl"}, templates={"main": "templates/main"}, chunker={"urn:cts:greekLit:tlg2856.tlg001.1st1K-grc1": meadow_chunker})
def test_dispatching_output(self): tic = CtsTextInventoryCollection() latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic) latin.set_label("Classical Latin", "eng") farsi = CtsTextInventoryMetadata("urn:perseus:farsiLit", parent=tic) farsi.set_label("Farsi", "eng") gc = CtsTextInventoryMetadata("urn:perseus:greekLit", parent=tic) gc.set_label("Ancient Greek", "eng") gc.set_label("Grec Ancien", "fre") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False @dispatcher.inventory("urn:perseus:farsiLit") def dispatchfFarsiLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:farsiLit:"): return True return False @dispatcher.inventory("urn:perseus:greekLit") def dispatchGreekLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:greekLit:"): return True return False resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"], dispatcher=dispatcher) resolver.logger.disabled = True resolver.REMOVE_EMPTY = False resolver.parse() all = resolver.getMetadata().export(Mimetypes.XML.CTS) latin_stuff = resolver.getMetadata("urn:perseus:latinLit").export( Mimetypes.XML.CTS) greek_stuff = resolver.getMetadata("urn:perseus:greekLit").export( Mimetypes.XML.CTS) farsi_stuff = resolver.getMetadata("urn:perseus:farsiLit").export( Mimetypes.XML.CTS) get_graph().remove((None, None, None)) latin_stuff, greek_stuff, farsi_stuff = XmlCtsTextInventoryMetadata.parse(latin_stuff), \ XmlCtsTextInventoryMetadata.parse(greek_stuff), \ XmlCtsTextInventoryMetadata.parse(farsi_stuff) self.assertEqual(len(latin_stuff.readableDescendants), 19, "There should be 19 readable descendants in Latin") self.assertIsInstance(latin_stuff, CtsTextInventoryMetadata, "should be textinventory") self.assertEqual( len(greek_stuff.readableDescendants), 6, "There should be 6 readable descendants in Greek [6 only in __cts__.xml]" ) self.assertEqual(len(farsi_stuff.descendants), 0, "There should be nothing in FarsiLit") self.assertEqual( greek_stuff.get_label("fre"), None, # Text inventory have no label in CTS "Label should be correct") get_graph().remove((None, None, None)) all = XmlCtsTextInventoryMetadata.parse(all) self.assertEqual( len(all.readableDescendants), 25, "There should be all 25 readable descendants in the master collection" )
# -*- coding: utf-8 -*- from flask import Flask from capitains_nautilus.cts.resolver import NautilusCTSResolver from capitains_nautilus.flask_ext import FlaskNautilus from fullnemo import FullNemo #from flask_nemo.fullnemo import FullNemo from dispatcher import dispatcher flask_app = Flask("Flask Application for Nemo") resolver = NautilusCTSResolver(["/usr/share/dh-data/theses"], dispatcher=dispatcher) #resolver.parse() nautilus_api = FlaskNautilus(prefix="/nemo/api", app=flask_app, resolver=resolver) nemo = FullNemo( name="Positions de thèse", app=flask_app, resolver=resolver, base_url="/nemo", css=["assets/css/html.css", "assets/css/postprod.css"], js=["assets/js/Tree.js", "assets/js/postprod.js"], statics=["assets/images/logo.png"], transform={"default": "assets/xsl/tei2html.xsl", "common" : "assets/xsl/common.xsl"}, templates={"main": "templates/main"} ) if __name__ == "__main__": flask_app.run(debug=True)
from werkzeug.contrib.cache import FileSystemCache from capitains_nautilus.cts.resolver import NautilusCTSResolver from capitains_nautilus.flask_ext import FlaskNautilus from . import create_app from .nemo import NemoFormulae from .dispatcher_builder import organizer flask_app = create_app() resolver = NautilusCTSResolver( flask_app.config['CORPUS_FOLDERS'], dispatcher=organizer, # cache=FileSystemCache(flask_app.config['CACHE_DIRECTORY']) ) # nautilus_api = FlaskNautilus(prefix="/api", resolver=resolver, app=flask_app) nemo = NemoFormulae(name="InstanceNemo", app=flask_app, resolver=resolver, base_url="", css=["assets/css/theme.css"], js=["assets/js/empty.js"], static_folder="./assets/", transform={ "default": "components/epidoc.xsl", "notes": "components/extract_notes.xsl" }, templates={ "main": "templates/main", "errors": "templates/errors", "auth": "templates/auth", "search": "templates/search"
fro = PrototypeTextInventory("urn:geste", parent=tic) #Rien à voir avec les identifiants cts, c'est un identifiant de projet fro.set_label("Corpus de chansons de geste", "fro") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:geste") def dispatchGeste(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:froLit"): #et cette fois, c'est bien du cts et on file le début des chemins de citation. return True return False cache = Cache() NautilusDummy = NautilusCTSResolver( [ "." ], dispatcher=dispatcher ) NautilusDummy.logger.setLevel(logging.ERROR) def scheme_grouper(text, getreffs): level = len(text.citation) groupby = 100 types = [citation.name for citation in text.citation] if 'word' in types: types = types[:types.index("word")] if str(text.id) == "urn:cts:latinLit:stoa0040.stoa062.opp-lat1": level, groupby = 1, 2 elif types == ["vers", "mot"]: level, groupby = 1, 100
def build_resolver(configuration_file): """ :param configuration_file: :return: Organizer, Resolver and Cache handler """ with open(configuration_file) as f: xml = etree.parse(f) directories = [ # Compute path relative to the configuration files relative_folder(configuration_file, directory) for directory in xml.xpath("//corpora/corpus/text()") ] default_collection = None general_collection = CtsTextInventoryCollection() filters_to_register = [] for collection in xml.xpath("//collections/collection"): identifier = collection.xpath("./identifier/text()")[0] if collection.get("default") == "true": default_collection = identifier current_collection = CtsTextInventoryMetadata( identifier, parent=general_collection) for name in collection.xpath("./name"): current_collection.set_label(name.text, name.get("lang")) # We look at dispatching filters in the collection for filters in collection.xpath("./filters"): # We register prefix filters prefix_filters = [] for prefix in filters.xpath("./id-starts-with/text()"): prefix_filters.append( lambda collection: str(collection.id).startswith(prefix)) # We register citation filters citation_filters = [] for citation_name in filters.xpath("./citation-contains/text()"): citation_filters.append( lambda collection: citation_contain_filter( collection, citation_name)) # We register path based filters directory_filters = [] for target_directory in filters.xpath("./folder/text()"): directory_filters.append( lambda collection, path=None: path.startswith( relative_folder(configuration_file, target_directory))) filters_to_register += [ (identifier, collection_dispatcher_builder(collection, prefix_filters, citation_filters, directory_filters)) ] # Create the dispatcher organizer = CollectionDispatcher(general_collection, default_inventory_name=default_collection) for destination_collection, anonymous_dispatching_function in filters_to_register: organizer.add(anonymous_dispatching_function, destination_collection) # Set-up the cache folder # ToDO : Add a system for redis ? cache = None for cache_folder in xml.xpath("//cache-folder/text()"): cache = FileSystemCache(cache_folder) if cache is None: cache = SimpleCache() resolver = NautilusCTSResolver(resource=directories, dispatcher=organizer, cache=cache) return organizer, resolver, cache
import os import flask from flask_nemo import Nemo from capitains_nautilus.cts.resolver import NautilusCTSResolver from capitains_nautilus.flask_ext import FlaskNautilus app = flask.Flask("CTS webserver demo (nemo)") LOCAL_CONFIG = 'config.py' if os.path.exists(LOCAL_CONFIG): app.config.from_pyfile(LOCAL_CONFIG) corpora = [entry.path for entry in os.scandir('corpora') if entry.is_dir()] print('app: resolver given', corpora) resolver = NautilusCTSResolver(corpora) resolver.parse() nautilus = FlaskNautilus(prefix='/api', app=app, resolver=resolver) nemo = Nemo(name='Nemo', app=app, resolver=resolver, base_url='/nemo') @app.route('/') def home(): '''Placeholder home page to help visitors. We're mainly here to serve the nemo browser and api endoints, but provide a simple landing page in case we're serving the whole domain.''' return flask.render_template('index.html',