def test_dispatching_error(self): tic = CtsTextInventoryCollection() latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic) latin.set_label("Classical Latin", "eng") dispatcher = CollectionDispatcher(tic) # We remove default dispatcher dispatcher.__methods__ = [] @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False NautilusCTSResolver.RAISE_ON_UNDISPATCHED = True with self.assertRaises(Exception): resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"], dispatcher=dispatcher) resolver.logger.disabled = True resolver.parse() NautilusCTSResolver.RAISE_ON_UNDISPATCHED = False try: resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"], dispatcher=dispatcher) resolver.logger.disabled = True resolver.REMOVE_EMPTY = False resolver.parse() except UndispatchedTextError as E: self.fail("UndispatchedTextError should not have been raised")
def setUp(self): # Full creation of app self.cache = FileSystemCache(subprocess_cache_dir, default_timeout=0) self.resolver = NautilusCTSResolver(subprocess_repository, dispatcher=make_dispatcher(), cache=self.cache) self.__app__ = Flask("Nautilus") self.http_cache = Cache(self.app, config={ 'CACHE_TYPE': "filesystem", "CACHE_DIR": http_cache_dir, "CACHE_DEFAULT_TIMEOUT": 0 }) self.nautilus = FlaskNautilus(app=self.app, prefix="/api", name="nautilus", resolver=self.resolver, flask_caching=self.http_cache) self.test_client = self.app.test_client() # Option to ensure cache works self.former_parse = self.resolver.parse def x(*k, **kw): raise self.ParsingCalled("Parse should not be called") self.resolver.parse = x
def test_get_capabilities_nocites(self): """ Check Get Capabilities latinLit data""" Repository = NautilusCTSResolver(["./tests/testing_data/latinLit"]) self.assertEqual( len( Repository.__getTextMetadata__( urn="urn:cts:latinLit:stoa0045.stoa008.perseus-lat2")[0]), 0, "Texts without citations were ignored")
def test_get_shared_textgroup_cross_repo(self): """ Check Get Capabilities """ Repository = NautilusCTSResolver([ "./tests/testing_data/farsiLit", "./tests/testing_data/latinLit2" ]) self.assertIsNotNone( Repository.__getText__( "urn:cts:latinLit:phi1294.phi002.perseus-lat2"), "We should find perseus-lat2") self.assertIsNotNone( Repository.__getText__("urn:cts:latinLit:phi1294.phi002.opp-lat2"), "We should find perseus-lat2")
def test_restricted_cors(self): """ Check that area-restricted cors works """ app = Flask("Nautilus") FlaskNautilus( app=app, resolver=NautilusCTSResolver(["./tests/test_data/latinLit"]), access_Control_Allow_Methods={"r_cts": "OPTIONS", "r_dts_collection": "OPTIONS", "r_dts_collections": "OPTIONS"}, access_Control_Allow_Origin={"r_cts": "foo.bar", "r_dts_collection":"*", "r_dts_collections":"*"} ) _app = app.test_client() self.assertEqual(_app.get("/cts?request=GetCapabilities").headers["Access-Control-Allow-Origin"], "foo.bar") self.assertEqual(_app.get("/cts?request=GetCapabilities").headers["Access-Control-Allow-Methods"], "OPTIONS")
def test_text_resource(self): """ Test to get the text resource to perform other queries """ Repository = NautilusCTSResolver(["./tests/testing_data/farsiLit"]) text, metadata = Repository.__getText__( "urn:cts:farsiLit:hafez.divan.perseus-eng1") self.assertEqual(len(text.citation), 4, "Object has a citation property of length 4") self.assertEqual( text.getTextualNode( Reference("1.1.1.1")).export(output=Mimetypes.PLAINTEXT), "Ho ! Saki, pass around and offer the bowl (of love for God) : ### ", "It should be possible to retrieve text")
def create_app(self): app = create_app(TestConfig) self.nemo = NemoFormulae(name="InstanceNemo", resolver=NautilusCTSResolver(app.config['CORPUS_FOLDERS']), app=app, base_url="", templates={"main": "templates/main", "errors": "templates/errors", "auth": "templates/auth", "search": "templates/search"}, css=["assets/css/theme.css"], js=["assets/js/empty.js"], static_folder="./assets/") return app
def test_dispatching_latin_greek(self): tic = CtsTextInventoryCollection() latin = XmlCtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic) latin.set_label("Classical Latin", "eng") farsi = XmlCtsTextInventoryMetadata("urn:perseus:farsiLit", parent=tic) farsi.set_label("Farsi", "eng") gc = XmlCtsTextInventoryMetadata("urn:perseus:greekLit", parent=tic) gc.set_label("Ancient Greek", "eng") gc.set_label("Grec Ancien", "fre") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False @dispatcher.inventory("urn:perseus:farsiLit") def dispatchfFarsiLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:farsiLit:"): return True return False @dispatcher.inventory("urn:perseus:greekLit") def dispatchGreekLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:greekLit:"): return True return False resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"], dispatcher=dispatcher) resolver.logger.disabled = True resolver.REMOVE_EMPTY = False resolver.parse() latin_stuff = resolver.getMetadata("urn:perseus:latinLit") greek_stuff = resolver.getMetadata("urn:perseus:greekLit") farsi_stuff = resolver.getMetadata("urn:perseus:farsiLit") self.assertEqual(len(latin_stuff.readableDescendants), 19, "There should be 19 readable descendants in Latin") self.assertIsInstance(latin_stuff, CtsTextInventoryMetadata, "should be textinventory") self.assertEqual( len(greek_stuff.readableDescendants), 6, "There should be 6 readable descendants in Greek [6 only in __cts__.xml]" ) self.assertEqual(len(farsi_stuff.descendants), 0, "There should be nothing in FarsiLit") self.assertEqual(str(greek_stuff.get_label("fre")), "Grec Ancien", "Label should be correct") with self.assertRaises(KeyError): _ = latin_stuff["urn:cts:greekLit:tlg0003"]
def setUp(self): output = call([python, "./tests/cts/run_cache.py"], cwd=cwd) if output != 0: raise Exception("Creating cache failed") self.cache = FileSystemCache(subprocess_cache_dir) self.resolver = NautilusCTSResolver(resource=subprocess_repository, cache=self.cache) self.resolver.logger.disabled = True def x(*k, **kw): raise Exception("Parse should not be called") self.resolver.parse = x
def test_resource_parser(self): """ Test that the initiation finds correctly the resources """ Repository = NautilusCTSResolver(["./tests/testing_data/farsiLit"]) self.assertEqual(Repository.inventory["urn:cts:farsiLit:hafez"].urn, URN("urn:cts:farsiLit:hafez"), "Hafez is found") self.assertEqual( len(Repository.inventory["urn:cts:farsiLit:hafez"].works), 1, "Hafez has one child") self.assertEqual( Repository.inventory["urn:cts:farsiLit:hafez.divan"].urn, URN("urn:cts:farsiLit:hafez.divan"), "Divan is found") self.assertEqual( len(Repository.inventory["urn:cts:farsiLit:hafez.divan"].texts), 3, "Divan has 3 children")
def _commandline(repositories, port=8000, host="127.0.0.1", debug=False, cache=None, cache_path="./cache", redis=None): """ Run a CTS API from command line. .. warning:: This function should not be used in the production context :param repositories: :param port: :param ip: :param debug: :param cache: :param cache_path: :return: """ if cache == "redis": nautilus_cache = RedisCache(redis) cache_type = "redis" elif cache == "filesystem": nautilus_cache = FileSystemCache(cache_path) cache_type = "simple" else: nautilus_cache = NullCache() cache_type = "simple" app = Flask("Nautilus") if debug: app.logger.setLevel(logging.INFO) resolver = NautilusCTSResolver(resource=repositories) nautilus = FlaskNautilus( app=app, resolver=resolver #parser_cache=WerkzeugCacheWrapper(nautilus_cache), #logger=None ) nautilus.resolver.parse() if debug: app.run(debug=debug, port=port, host=host) else: app.debug = debug http_server = HTTPServer(WSGIContainer(app)) http_server.bind(port=port, address=host) http_server.start(0) IOLoop.current().start()
def make_resolver(directories=None, cache_directory=None): """ Generate the CapiTainS Resolver and add metadata to it """ if directories is None: directories = glob.glob("data/raw/corpora/**/**") logger = logging.getLogger(__name__) logger.setLevel(logging.CRITICAL) kwargs = dict(resource=directories, logger=logger) if cache_directory: kwargs["cache"] = FileSystemCache(cache_directory) print("Clearing cache") kwargs["cache"].clear() resolver = NautilusCTSResolver(**kwargs) return resolver
def setUp(self): """ Set up a dummy application with a manager """ nautilus_cache = FileSystemCache("cache_dir") nautilus_cache.clear() app = Flask("Nautilus") resolver = NautilusCTSResolver(["./tests/test_data/latinLit"], cache=nautilus_cache, logger=logger) flask_nautilus = FlaskNautilus( app=app, resolver=resolver, flask_caching=Cache(config={'CACHE_TYPE': 'filesystem'}), logger=logger ) self.cache_manager = nautilus_cache self.nautilus = flask_nautilus self.resolver = resolver self.resolver.logger.disabled = True self.manager = FlaskNautilusManager(resolver, flask_nautilus)
def test_get_capabilities(self): """ Check Get Capabilities """ Repository = NautilusCTSResolver(["./tests/testing_data/farsiLit"]) Repository.parse() self.assertEqual(len(Repository.__getTextMetadata__()[0]), 4, "General no filter works") self.assertEqual( len(Repository.__getTextMetadata__(category="edition")[0]), 2, "Type filter works") self.assertEqual(len(Repository.__getTextMetadata__(lang="ger")[0]), 1, "Filtering on language works") self.assertEqual( len( Repository.__getTextMetadata__(category="edition", lang="ger")[0]), 0, "Type filter + lang works") self.assertEqual( len( Repository.__getTextMetadata__(category="translation", lang="ger")[0]), 1, "Type filter + lang works") self.assertEqual( len( Repository.__getTextMetadata__(page=1, limit=2, pagination=True)[0]), 2, "Pagination works without other filters") self.assertEqual( len( Repository.__getTextMetadata__(page=2, limit=2, pagination=True)[0]), 2, "Pagination works without other filters at list end") self.assertEqual( len(Repository.__getTextMetadata__(urn="urn:cts:farsiLit")[0]), 3, "URN Filtering works") self.assertEqual( len(Repository.__getTextMetadata__(urn="urn:cts:latinLit")[0]), 1, "URN Filtering works") self.assertEqual( len( Repository.__getTextMetadata__( urn="urn:cts:farsiLit:hafez.divan.perseus-eng1")[0]), 1, "Complete URN filtering works")
def setUp(self): nautilus_cache = RedisCache() app = Flask("Nautilus") self.cache = Cache(config={'CACHE_TYPE': 'simple'}) self.nautilus = FlaskNautilus( app=app, resolver=NautilusCTSResolver(["./tests/test_data/latinLit"]), flask_caching=self.cache, logger=logger ) app.debug = True self.cache.init_app(app) self.app = app.test_client() self.parent = HttpCtsRetriever("/cts") self.resolver = HttpCtsResolver(endpoint=self.parent) logassert.setup(self, self.nautilus.logger.name) self.nautilus.logger.disabled = True def call(this, parameters={}): """ Call an endpoint given the parameters :param parameters: Dictionary of parameters :type parameters: dict :rtype: text """ parameters = { key: str(parameters[key]) for key in parameters if parameters[key] is not None } if this.inventory is not None and "inv" not in parameters: parameters["inv"] = this.inventory request = self.app.get("/cts?{}".format( "&".join( ["{}={}".format(key, value) for key, value in parameters.items()]) ) ) self.parent.called.append(parameters) return request.data.decode() self.parent.called = [] self.parent.call = lambda x: call(self.parent, x)
def test_dispatching_output(self): tic = CtsTextInventoryCollection() latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic) latin.set_label("Classical Latin", "eng") farsi = CtsTextInventoryMetadata("urn:perseus:farsiLit", parent=tic) farsi.set_label("Farsi", "eng") gc = CtsTextInventoryMetadata("urn:perseus:greekLit", parent=tic) gc.set_label("Ancient Greek", "eng") gc.set_label("Grec Ancien", "fre") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False @dispatcher.inventory("urn:perseus:farsiLit") def dispatchfFarsiLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:farsiLit:"): return True return False @dispatcher.inventory("urn:perseus:greekLit") def dispatchGreekLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:greekLit:"): return True return False resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"], dispatcher=dispatcher) resolver.logger.disabled = True resolver.REMOVE_EMPTY = False resolver.parse() all = resolver.getMetadata().export(Mimetypes.XML.CTS) latin_stuff = resolver.getMetadata("urn:perseus:latinLit").export( Mimetypes.XML.CTS) greek_stuff = resolver.getMetadata("urn:perseus:greekLit").export( Mimetypes.XML.CTS) farsi_stuff = resolver.getMetadata("urn:perseus:farsiLit").export( Mimetypes.XML.CTS) get_graph().remove((None, None, None)) latin_stuff, greek_stuff, farsi_stuff = XmlCtsTextInventoryMetadata.parse(latin_stuff), \ XmlCtsTextInventoryMetadata.parse(greek_stuff), \ XmlCtsTextInventoryMetadata.parse(farsi_stuff) self.assertEqual(len(latin_stuff.readableDescendants), 19, "There should be 19 readable descendants in Latin") self.assertIsInstance(latin_stuff, CtsTextInventoryMetadata, "should be textinventory") self.assertEqual( len(greek_stuff.readableDescendants), 6, "There should be 6 readable descendants in Greek [6 only in __cts__.xml]" ) self.assertEqual(len(farsi_stuff.descendants), 0, "There should be nothing in FarsiLit") self.assertEqual( greek_stuff.get_label("fre"), None, # Text inventory have no label in CTS "Label should be correct") get_graph().remove((None, None, None)) all = XmlCtsTextInventoryMetadata.parse(all) self.assertEqual( len(all.readableDescendants), 25, "There should be all 25 readable descendants in the master collection" )
import os import flask from flask_nemo import Nemo from capitains_nautilus.cts.resolver import NautilusCTSResolver from capitains_nautilus.flask_ext import FlaskNautilus app = flask.Flask("CTS webserver demo (nemo)") LOCAL_CONFIG = 'config.py' if os.path.exists(LOCAL_CONFIG): app.config.from_pyfile(LOCAL_CONFIG) corpora = [entry.path for entry in os.scandir('corpora') if entry.is_dir()] print('app: resolver given', corpora) resolver = NautilusCTSResolver(corpora) resolver.parse() nautilus = FlaskNautilus(prefix='/api', app=app, resolver=resolver) nemo = Nemo(name='Nemo', app=app, resolver=resolver, base_url='/nemo') @app.route('/') def home(): '''Placeholder home page to help visitors. We're mainly here to serve the nemo browser and api endoints, but provide a simple landing page in case we're serving the whole domain.''' return flask.render_template('index.html',
# -*- coding: utf-8 -*- from flask import Flask from capitains_nautilus.cts.resolver import NautilusCTSResolver from capitains_nautilus.flask_ext import FlaskNautilus from fullnemo import FullNemo #from flask_nemo.fullnemo import FullNemo from dispatcher import dispatcher flask_app = Flask("Flask Application for Nemo") resolver = NautilusCTSResolver(["/usr/share/dh-data/theses"], dispatcher=dispatcher) #resolver.parse() nautilus_api = FlaskNautilus(prefix="/nemo/api", app=flask_app, resolver=resolver) nemo = FullNemo( name="Positions de thèse", app=flask_app, resolver=resolver, base_url="/nemo", css=["assets/css/html.css", "assets/css/postprod.css"], js=["assets/js/Tree.js", "assets/js/postprod.js"], statics=["assets/images/logo.png"], transform={"default": "assets/xsl/tei2html.xsl", "common" : "assets/xsl/common.xsl"}, templates={"main": "templates/main"} ) if __name__ == "__main__": flask_app.run(debug=True)
def test_missing_text_resource(self): """ Test to make sure an UnknownCollection error is raised when a text is missing """ Repository = NautilusCTSResolver(["./tests/test_data/missing_text"]) with self.assertRaises(UnknownCollection): text, metadata = Repository.__getText__( "urn:cts:farsiLit:hafez.divan.missing_text")
fro = PrototypeTextInventory("urn:geste", parent=tic) #Rien à voir avec les identifiants cts, c'est un identifiant de projet fro.set_label("Corpus de chansons de geste", "fro") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:geste") def dispatchGeste(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:froLit"): #et cette fois, c'est bien du cts et on file le début des chemins de citation. return True return False cache = Cache() NautilusDummy = NautilusCTSResolver( [ "." ], dispatcher=dispatcher ) NautilusDummy.logger.setLevel(logging.ERROR) def scheme_grouper(text, getreffs): level = len(text.citation) groupby = 100 types = [citation.name for citation in text.citation] if 'word' in types: types = types[:types.index("word")] if str(text.id) == "urn:cts:latinLit:stoa0040.stoa062.opp-lat1": level, groupby = 1, 2 elif types == ["vers", "mot"]: level, groupby = 1, 100
def setUp(self): get_graph().remove((None, None, None)) self.resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"])
latin.set_label("Latin Classique", "fre") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False return dispatcher nautilus_cache = FileSystemCache(subprocess_cache_dir, default_timeout=0) resolver = NautilusCTSResolver(subprocess_repository, dispatcher=make_dispatcher(), cache=nautilus_cache) app = Flask("Nautilus") http_cache = Cache(app, config={ 'CACHE_TYPE': "filesystem", "CACHE_DIR": http_cache_dir, "CACHE_DEFAULT_TIMEOUT": 0 }) nautilus = FlaskNautilus(app=app, prefix="/api", name="nautilus", resolver=resolver, flask_caching=http_cache)
'urn:cts:greekLit:tlg0020.tlg002.alpheios-text-grc1', 'urn:cts:greekLit:tlg0020.tlg001.alpheios-text-grc1', 'urn:cts:greekLit:tlg0011.tlg003.alpheios-text-grc1', ] excluded_editions = [ 'urn:cts:greekLit:tlg0011.tlg003.perseus-grc2', ] resolver = NautilusCTSResolver( [ os.path.join(d, o) for o in os.listdir(d) if os.path.isdir(os.path.join(d, o)) ], dispatcher=dispatcher, filter=lambda t: (t.__subtype__ == 'edition' and t.urn.upTo(URN.VERSION ) not in excluded_editions and (str(t.urn.namespace) in unfiltered_collections or str( t.urn.textgroup) in allowed_textgroups or t.urn.upTo(URN.WORK) in allowed_works or t.urn.upTo(URN.VERSION) in allowed_editions)), cache=None) app = Flask("Nautilus") app.secret_key = os.environ.get('ALPHEIOS_NEMO_APPKEY', 'appsecret') client_id = os.environ.get('ALPHEIOS_NEMO_AUTH0_CLIENTID', 'clientidhere') client_secret = os.environ.get('ALPHEIOS_NEMO_AUTH0_CLIENTSECRET', 'clientsecrethere') proxy_base = os.environ.get('ALPHEIOS_NEMO_PROXYBASE', 'http://dev.alpheios.net:5000') auth_max_age_override = os.environ.get('ALPHEIOS_NEMO_AUTH0_MAX_AGE_SECONDS')
latin_texts = CtsTextInventoryMetadata("mnemosyne:latin", parent=general_collection) latin_texts.set_label("Latin Texts", "eng") latin_texts.set_label("Textes Latins", "fre") misc = CtsTextInventoryMetadata("mnemosyne:misc", parent=general_collection) misc.set_label("Miscellaneous", "eng") misc.set_label("Textes Divers", "fre") organizer = CollectionDispatcher(general_collection, default_inventory_name="mnemosyne:misc") @organizer.inventory("mnemosyne:grec") def organize_my_grec(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:greekLit:"): return True return False @organizer.inventory("mnemosyne:latin") def organize_my_latin(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False # Parsing the data resolver = NautilusCTSResolver(["corpora/hchn"], dispatcher=organizer) resolver.parse()
parent=general_collection) greek_texts.set_label("Greek Texts", "eng") organizer = CollectionDispatcher(general_collection, default_inventory_name="id:misc") @organizer.inventory("greek_texts") def organize_my_meadow(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:greekLit"): return True return False flask_app = Flask("Flask Application for Nemo") resolver = NautilusCTSResolver(["corpora/meadow"], dispatcher=organizer) resolver.parse() nautilus_api = FlaskNautilus(prefix="/api", app=flask_app, resolver=resolver) nemo = Nemo( name="InstanceNemo", app=flask_app, resolver=resolver, base_url="", css=["assets/css/theme.css"], js=["assets/js/alpheios.js"], statics=["assets/images/logo.jpg"], transform={"default": "components/main.xsl"}, templates={"main": "templates/main"}, chunker={"urn:cts:greekLit:tlg2856.tlg001.1st1K-grc1": meadow_chunker})
from werkzeug.contrib.cache import FileSystemCache from capitains_nautilus.cts.resolver import NautilusCTSResolver from capitains_nautilus.flask_ext import FlaskNautilus from . import create_app from .nemo import NemoFormulae from .dispatcher_builder import organizer flask_app = create_app() resolver = NautilusCTSResolver( flask_app.config['CORPUS_FOLDERS'], dispatcher=organizer, # cache=FileSystemCache(flask_app.config['CACHE_DIRECTORY']) ) # nautilus_api = FlaskNautilus(prefix="/api", resolver=resolver, app=flask_app) nemo = NemoFormulae(name="InstanceNemo", app=flask_app, resolver=resolver, base_url="", css=["assets/css/theme.css"], js=["assets/js/empty.js"], static_folder="./assets/", transform={ "default": "components/epidoc.xsl", "notes": "components/extract_notes.xsl" }, templates={ "main": "templates/main", "errors": "templates/errors", "auth": "templates/auth", "search": "templates/search"
import os from capitains_nautilus.cts.resolver import NautilusCTSResolver from werkzeug.contrib.cache import FileSystemCache root_path = "/var/lib/nautilus" data_path = os.path.join(root_path, "data") cache_path = os.path.join(root_path, "cache") if not os.path.exists(data_path): os.mkdir(data_path) if not os.path.exists(cache_path): os.mkdir(cache_path) cache = FileSystemCache(cache_path) resolver = NautilusCTSResolver([ os.path.join(data_path, entry) for entry in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, entry)) ], cache=cache) def preload(): resolver.getMetadata(objectId=None)
def build_resolver(configuration_file): """ :param configuration_file: :return: Organizer, Resolver and Cache handler """ with open(configuration_file) as f: xml = etree.parse(f) directories = [ # Compute path relative to the configuration files relative_folder(configuration_file, directory) for directory in xml.xpath("//corpora/corpus/text()") ] default_collection = None general_collection = CtsTextInventoryCollection() filters_to_register = [] for collection in xml.xpath("//collections/collection"): identifier = collection.xpath("./identifier/text()")[0] if collection.get("default") == "true": default_collection = identifier current_collection = CtsTextInventoryMetadata( identifier, parent=general_collection) for name in collection.xpath("./name"): current_collection.set_label(name.text, name.get("lang")) # We look at dispatching filters in the collection for filters in collection.xpath("./filters"): # We register prefix filters prefix_filters = [] for prefix in filters.xpath("./id-starts-with/text()"): prefix_filters.append( lambda collection: str(collection.id).startswith(prefix)) # We register citation filters citation_filters = [] for citation_name in filters.xpath("./citation-contains/text()"): citation_filters.append( lambda collection: citation_contain_filter( collection, citation_name)) # We register path based filters directory_filters = [] for target_directory in filters.xpath("./folder/text()"): directory_filters.append( lambda collection, path=None: path.startswith( relative_folder(configuration_file, target_directory))) filters_to_register += [ (identifier, collection_dispatcher_builder(collection, prefix_filters, citation_filters, directory_filters)) ] # Create the dispatcher organizer = CollectionDispatcher(general_collection, default_inventory_name=default_collection) for destination_collection, anonymous_dispatching_function in filters_to_register: organizer.add(anonymous_dispatching_function, destination_collection) # Set-up the cache folder # ToDO : Add a system for redis ? cache = None for cache_folder in xml.xpath("//cache-folder/text()"): cache = FileSystemCache(cache_folder) if cache is None: cache = SimpleCache() resolver = NautilusCTSResolver(resource=directories, dispatcher=organizer, cache=cache) return organizer, resolver, cache
from capitains_nautilus.cts.resolver import NautilusCTSResolver from werkzeug.contrib.cache import FileSystemCache from tests.cts.config import subprocess_repository, subprocess_cache_dir cache = FileSystemCache(subprocess_cache_dir) resolver = NautilusCTSResolver(resource=subprocess_repository, cache=cache) resolver.parse()