def main(argv=None): """Start up a CherryPy server to serve the SRU, OAI-PMH applications.""" global argparser, c3_session, c3_server global sru_app, oaipmh_app # WSGI Apps if argv is None: args = argparser.parse_args() else: args = argparser.parse_args(argv) c3_session = Session() c3_server = SimpleServer(c3_session, args.serverconfig) # Init SRU App sru_configs = get_configsFromServer(c3_session, c3_server) sru_app = SRUWsgiHandler(c3_session, sru_configs) # Init OAI-PMH App dbs, oaipmh_configs = get_databasesAndConfigs(c3_session, c3_server) oaipmh_app = OAIPMHWsgiApplication(c3_session, oaipmh_configs, dbs) # Mount various Apps and static directories urlmap = URLMap() urlmap['/docs'] = make_pkg_resources(None, 'cheshire3', 'docs/build/html') urlmap['/api/sru'] = sru_app urlmap['/api/oaipmh/2.0'] = oaipmh_app url = "http://{0}:{1}/".format(args.hostname, args.port) if args.browser: webbrowser.open(url) print("Hopefully a new browser window/tab should have opened " "displaying the application.") paste.httpserver.serve( urlmap, host=args.hostname, port=args.port, )
def __init__(self): self.session = Session() self.session.database = 'db_dickens' self.serv = SimpleServer( self.session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')) self.db = self.serv.get_object(self.session, self.session.database) self.qf = self.db.get_object(self.session, 'defaultQueryFactory')
def setUp(self): self.session = baseObjects.Session() serverConfig = os.path.join(cheshire3Root, 'configs', 'serverConfig.xml') self.server = SimpleServer(self.session, serverConfig) # Disable stdout logging lgr = self.server.get_path(self.session, 'defaultLogger') lgr.minLevel = 60
def main(argv=None): """Unregister a Database from the Cheshire3 Server.""" global argparser, session, server if argv is None: args = argparser.parse_args() else: args = argparser.parse_args(argv) session = Session() server = SimpleServer(session, args.serverconfig) # Tell the server to unregister the Database server.unregister_databaseConfig(session, args.identifier) return 0
def main(argv=None): """Load data into a Cheshire3 database based on parameters in argv.""" global argparser, session, server, db if argv is None: args = argparser.parse_args() else: args = argparser.parse_args(argv) session = Session() server = SimpleServer(session, args.serverconfig) if args.database is None: try: dbid = identify_database(session, os.getcwd()) except EnvironmentError as e: server.log_critical(session, e.message) return 1 server.log_debug( session, "database identifier not specified, discovered: {0}".format(dbid) ) else: dbid = args.database try: db = server.get_object(session, dbid) except ObjectDoesNotExistException: msg = """Cheshire3 database {0} does not exist. Please provide a different database identifier using the --database option. """.format(dbid) server.log_critical(session, msg) return 2 else: # Allow for multiple data arguments docFac = db.get_object(session, 'defaultDocumentFactory') for dataArg in args.data: try: docFac.load(session, dataArg, args.cache, args.format, args.tagname, args.codec ) except MissingDependencyException as e: server.log_critical(session, e.reason) missingDependencies = e.dependencies raise MissingDependencyException('cheshire3-load script', missingDependencies ) wf = db.get_object(session, 'buildIndexWorkflow') wf.process(session, docFac)
def setUp(self): self.session = Session() serverConfig = os.path.join(cheshire3Root, 'configs', 'serverConfig.xml') self.server = SimpleServer(self.session, serverConfig) for config in self._get_dependencyConfigs(): identifier = config.get('id') self.server.subConfigs[identifier] = config # Disable stdout logging lgr = self.server.get_path(self.session, 'defaultLogger') lgr.minLevel = 60 # Create object that will be tested config = self._get_config() self.testObj = makeObjectFromDom(self.session, config, self.server)
def __init__(self): ''' Sets up the connection with Cheshire3. ''' self.session = Session() self.session.database = 'db_dickens' self.serv = SimpleServer( self.session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')) self.db = self.serv.get_object(self.session, self.session.database) self.qf = self.db.get_object(self.session, 'defaultQueryFactory') self.resultSetStore = self.db.get_object(self.session, 'resultSetStore') self.idxStore = self.db.get_object(self.session, 'indexStore')
def main(argv=None): """Register a Database configuration file with the Cheshire3 Server.""" global argparser, session, server if argv is None: args = argparser.parse_args() else: args = argparser.parse_args(argv) session = Session() server = SimpleServer(session, args.serverconfig) # Make path to configfile absolute args.configfile = os.path.abspath(os.path.expanduser(args.configfile)) # Tell the server to register the config file server.register_databaseConfigFile(session, args.configfile) return 0
def __init__(self): ''' Set up a cheshire3 session/connection to the database. This initilisation does not handle the actual search term (cf. build_and_run_query). ''' self.session = Session() self.session.database = 'db_dickens' self.serv = SimpleServer(self.session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml') ) self.db = self.serv.get_object(self.session, self.session.database) self.qf = self.db.get_object(self.session, 'defaultQueryFactory') self.resultSetStore = self.db.get_object(self.session, 'resultSetStore') self.idxStore = self.db.get_object(self.session, 'indexStore')
def main(argv=None): """Start up a simple app server to serve the SRU application.""" global argparser, session, server if argv is None: args = argparser.parse_args() else: args = argparser.parse_args(argv) session = Session() server = SimpleServer(session, args.serverconfig) application = SRUWsgiHandler() try: httpd = make_server(args.hostname, args.port, application) except socket.error: print "" else: print """You will be able to access the application at: http://{0}:{1}""".format(args.hostname, args.port) httpd.serve_forever()
def main(argv=None): """Search a Cheshire3 database based on query in argv.""" global argparser, session, server, db if argv is None: args = argparser.parse_args() else: args = argparser.parse_args(argv) session = Session() server = SimpleServer(session, args.serverconfig) if args.database is None: try: dbid = identify_database(session, os.getcwd()) except EnvironmentError as e: server.log_critical(session, e.message) return 1 server.log_debug( session, "database identifier not specified, discovered: {0}".format(dbid) ) else: dbid = args.database try: db = server.get_object(session, dbid) except ObjectDoesNotExistException: msg = """Cheshire3 database {0} does not exist. Please provide a different database identifier using the --database option. """.format(dbid) server.log_critical(session, msg) return 2 else: qFac = db.get_object(session, 'defaultQueryFactory') query = qFac.get_query(session, args.query, format=args.format) resultSet = db.search(session, query) return _format_resultSet(resultSet, maximumRecords=args.maxRecs, startRecord=args.startRec)
# Apache Config: #<Directory /usr/local/apache2/htdocs/srw> # SetHandler mod_python # PythonDebug On # PythonPath "['/home/cheshire/c3/code', '/usr/local/lib/python2.3/lib-dynload']+sys.path" # PythonHandler srwApacheHandler #</Directory> # NB. SetHandler, not AddHandler. cheshirePath = os.environ.get('C3HOME', '/home/cheshire') session = Session() session.environment = "apache" serv = SimpleServer( session, os.path.join(cheshirePath, 'cheshire3', 'configs', 'serverConfig.xml')) configs = {} serv._cacheDatabases(session) for db in serv.databases.values(): if db.get_setting(session, 'SRW') or db.get_setting(session, 'srw'): db._cacheProtocolMaps(session) map = db.protocolMaps.get('http://www.loc.gov/zing/srw/', None) map2 = db.protocolMaps.get('http://www.loc.gov/zing/srw/update/', None) configs[map.databaseUrl] = { 'http://www.loc.gov/zing/srw/': map, 'http://www.loc.gov/zing/srw/update/': map2 }
rand = random.Random() from PyZ3950 import CQLParser asn1.register_oid(Z3950_QUERY_SQL, SQLQuery) asn1.register_oid(Z3950_QUERY_CQL, asn1.GeneralString) from cheshire3.baseObjects import Session, Database, Transformer, Workflow from cheshire3.server import SimpleServer from cheshire3 import internal from cheshire3 import cqlParser session = Session() session.environment = "apache" server = SimpleServer( session, os.path.join(internal.cheshire3Root, 'configs', 'serverConfig.xml')) configs = {} dbmap = {} server._cacheDatabases(session) for db in server.databases.values(): if db.get_setting(session, "z3950"): db._cacheProtocolMaps(session) map1 = db.protocolMaps.get('http://www.loc.gov/z3950/', None) if map1: configs[map1.databaseName] = map1 dbmap[db.id] = map1.databaseName session.resultSetStore = server.get_path(session, 'resultSetStore') session.logger = server.get_path(session, 'z3950Logger') session.configs = configs
def main(argv=None): """Initialize a Cheshire 3 database based on parameters in argv.""" global argparser, session, server, db if argv is None: args = argparser.parse_args() else: args = argparser.parse_args(argv) session = Session() server = SimpleServer(session, args.serverconfig) if args.database is None: if args.directory.endswith(os.path.sep): args.directory = args.directory[:-1] # Find local database name to use as basis of database id dbid = "db_{0}".format(os.path.basename(args.directory)) server.log_debug(session, ("database identifier not specified, defaulting to: " "{0}".format(dbid))) else: dbid = args.database try: db = server.get_object(session, dbid) except ObjectDoesNotExistException: # Doesn't exists, so OK to init it pass else: # TODO: check for --force ? msg = """database with id '{0}' has already been init'd. \ Please specify a different id using the --database option.""".format(dbid) server.log_critical(session, msg) raise ValueError(msg) # Create a .cheshire3 directory and populate it c3_dir = os.path.join(os.path.abspath(args.directory), '.cheshire3') for dir_path in [ c3_dir, os.path.join(c3_dir, 'stores'), os.path.join(c3_dir, 'indexes'), os.path.join(c3_dir, 'logs') ]: try: os.makedirs(dir_path) except OSError: # Directory already exists server.log_warning(session, "directory already exists {0}".format(dir_path)) # Generate config file(s) xmlFilesToWrite = {} # Generate Protocol Map(s) (ZeeRex) zrx = create_defaultZeerex(dbid, args) zrxPath = os.path.join(c3_dir, 'zeerex_sru.xml') args.zeerexPath = zrxPath xmlFilesToWrite[zrxPath] = zrx # Generate generic database config dbConfig = create_defaultConfig(dbid, args) dbConfigPath = os.path.join(c3_dir, 'config.xml') xmlFilesToWrite[dbConfigPath] = dbConfig # Generate config for generic selectors selectorConfig = create_defaultConfigSelectors() path = os.path.join(c3_dir, 'configSelectors.xml') dbConfig = include_configByPath(dbConfig, path) xmlFilesToWrite[path] = selectorConfig # Generate config for generic indexes indexConfig = create_defaultConfigIndexes() path = os.path.join(c3_dir, 'configIndexes.xml') dbConfig = include_configByPath(dbConfig, path) xmlFilesToWrite[path] = indexConfig # Generate config for default Workflows workflowConfig = create_defaultConfigWorkflows() path = os.path.join(c3_dir, 'configWorkflows.xml') dbConfig = include_configByPath(dbConfig, path) xmlFilesToWrite[path] = workflowConfig # Write configs to files for path, node in xmlFilesToWrite.iteritems(): with open(path, 'w') as conffh: conffh.write( etree.tostring(node, pretty_print=True, encoding="utf-8")) # Tell the server to register the config file server.register_databaseConfigFile(session, dbConfigPath) return 0
def main(argv=None): """Load data into a Cheshire3 database based on parameters in argv.""" global argparser, session, server, db if argv is None: args = argparser.parse_args() else: args = argparser.parse_args(argv) if irods is None: raise MissingDependencyException('icheshire3-load script', 'irods (PyRods)' ) session = Session() server = SimpleServer(session, args.serverconfig) if args.database is None: try: dbid = identify_database(session, os.getcwd()) except EnvironmentError as e: server.log_critical(session, e.message) return 1 server.log_debug( session, "database identifier not specified, discovered: {0}".format(dbid)) else: dbid = args.database try: db = server.get_object(session, dbid) except ObjectDoesNotExistException: msg = """Cheshire3 database {0} does not exist. Please provide a different database identifier using the --database option. """.format(dbid) server.log_critical(session, msg) return 2 else: # Allow for multiple data arguments docFac = db.get_object(session, 'defaultDocumentFactory') for dataArg in args.data: if dataArg.startswith('irods://'): parsed = urlsplit(dataArg) else: # Examine current environment status, myEnv = irods.getRodsEnv() try: host = myEnv.getRodsHost() except AttributeError: host = myEnv.rodsHost # Port try: myEnv.getRodsPort() except AttributeError: port = myEnv.rodsPort # User try: username = myEnv.getRodsUserName() except AttributeError: username = myEnv.rodsUserName netloc = '{0}@{1}:{2}'.format(username, host, port) try: cqm = myEnv.getRodsCwd() except AttributeError: cwd = myEnv.rodsCwd path = '/'.join([cwd, dataArg]) parsed = SplitResult('irods', netloc, path, None, None) dataArg = urlunsplit(parsed) server.log_debug(session, dataArg) if args.format is None or not args.format.startswith('i'): fmt = 'irods' else: fmt = args.format server.log_debug(session, fmt) try: docFac.load(session, dataArg, args.cache, fmt, args.tagname, args.codec) except MissingDependencyException as e: server.log_critical(session, e.reason) missingDependencies = e.dependencies raise MissingDependencyException('cheshire3-load script', missingDependencies) wf = db.get_object(session, 'buildIndexWorkflow') wf.process(session, docFac)
class Cheshire3Engine(BaseEngine): #schema = Schema(title=TEXT(stored=True), path=TEXT(stored=True), href=ID(stored=True), cfiBase=TEXT(stored=True), spinePos=TEXT(stored=True), content=TEXT) #database = 'db_tdo_simple_sru' cheshire_metadata_dir = '/cheshire3-metadata' session = Session() serverConfig = os.path.join(cheshire3Root, 'configs', 'serverConfig.xml') server = SimpleServer(session, serverConfig) queryFactory = None db = None titleSel = None anywhereSel = None proxExtractor = None def __initializeTitleSelector(self): try: self.titleSel = self.db.get_object(self.session, 'titleXPathSelector') except ObjectDoesNotExistException: try: self.titleSel = self.db.get_object(self.session, 'titleSelector') except ObjectDoesNotExistException as e: logging.error(e) def __initializeAnywhereSelector(self): try: self.anywhereSel = self.db.get_object(self.session, 'anywhereXPathSelector') except ObjectDoesNotExistException as e: logging.error(e) def __initializeProximityExtractor(self): try: self.proxExtractor = self.db.get_object(self.session, 'ProxExtractor') except ObjectDoesNotExistException as e: logging.error(e) def __highlight(self, text, term, n): """Searches for text, retrieves n words either side of the text, which are retuned seperately""" term_concordance = list() text_len = len(text) term_len = len(term) term_indexes = [w.start() for w in re.finditer(term, text)] for idx in term_indexes: start = idx - n end = text_len if (idx + term_len + n) > text_len else idx + term_len + n term_concordance.append(text[start:idx] + '<b class="match term0">' + term + '</b>' + text[idx:end]) return term_concordance def open(self): """ The Cheshire get_object line should throw an exception if it can't open passed db """ try: self.db = self.server.get_object(self.session, self.database_name) self.session.database = self.database_name except Exception as e: logging.error(e) logging.error("openning database {} failed".format( self.database_name)) def create(self): if not os.path.exists(self.database_path): os.makedirs(self.database_path) # create cheshire metadata directory if needed, then initialize with empty list metadata_path = self.database_path + self.cheshire_metadata_dir if not os.path.exists(metadata_path): os.makedirs(metadata_path) with open(metadata_path + '/' + self.database_name, 'w') as f: json.dump({}, f) try: logging.info("openning database {} to create".format( self.database_path)) os.system("cheshire3-init " + self.database_path + " --database=" + self.database_name) except Exception as e: logging.error(e) def add(self, path='', href='', title='', cfiBase='', spinePos=''): # first, index the document in cheshire3 using unix commands os.system("cheshire3-load --database=" + self.database_name + ' ' + path) doc_md = dict() doc_md[href] = { 'path': path, 'href': href, 'title': title, 'cfiBase': cfiBase, 'spinePos': spinePos } # title is not populated, so pulling filename from path prefix #filename = path[:path.find('/')] + '.json' metadata_path = self.database_path + self.cheshire_metadata_dir with open(metadata_path + '/' + self.database_name) as f_in: md_dict = json.load(f_in) md_dict.update(doc_md) with open(metadata_path + '/' + self.database_name, 'w') as f_out: json.dump(md_dict, f_out) #print "Current Path for directory writing: " + os.getcwd() def finished(self): """ In Cheshire, there are no cleanup commands that are needed. The add command will index specified documents fully and end, so a finished command is not required. """ pass def query(self, q, limit=None): """ In Cheshire3, you have to specify an index and query, else it defaults the all index which utilizes simple extraction. """ if self.queryFactory == None: self.queryFactory = self.db.get_object(self.session, 'defaultQueryFactory') if self.titleSel is None: self.__initializeTitleSelector() if self.anywhereSel is None: self.__initializeAnywhereSelector() if self.proxExtractor is None: self.__initializeProximityExtractor() c3Query = self.queryFactory.get_query(self.session, q) rs = self.db.search(self.session, c3Query) # open up the json file with reader specific attributes metadata_path = self.database_path + self.cheshire_metadata_dir with open(metadata_path + '/' + self.database_name) as f: db_md_dict = json.load(f) # loop through recordset, create new results list with dictionary of found values results = list() for rsi in rs: rec = rsi.fetch_record(self.session) # check the record titles titleData = self.titleSel.process_record(self.session, rec) # checking out the proximity attributes elems = self.anywhereSel.process_record(self.session, rec) doc_dict = self.proxExtractor.process_xpathResult( self.session, elems).values()[0] concordance = self.__highlight(doc_dict['text'], q, 20) pdb.set_trace() # extracts document name key fn_key = os.path.basename(titleData[3][0]) # append highlighted concordance to the dictionary db_md_dict[fn_key][u'highlight'] = " ".join(concordance) results.append(db_md_dict[fn_key]) return results
class Cheshire3Engine(BaseEngine): #schema = Schema(title=TEXT(stored=True), path=TEXT(stored=True), href=ID(stored=True), cfiBase=TEXT(stored=True), spinePos=TEXT(stored=True), content=TEXT) #database = 'db_tdo_simple_sru' cheshire_metadata_dir = '/cheshire3-metadata' session = Session() serverConfig = os.path.join(cheshire3Root, 'configs', 'serverConfig.xml') server = SimpleServer(session, serverConfig) queryFactory = None db = None titleSel = None anywhereSel = None proxExtractor = None def __initializeTitleSelector(self): try: self.titleSel = self.db.get_object(self.session, 'titleXPathSelector') except ObjectDoesNotExistException: try: self.titleSel = self.db.get_object(self.session, 'titleSelector') except ObjectDoesNotExistException as e: print e def __initializeAnywhereSelector(self): try: self.anywhereSel = self.db.get_object(self.session, 'anywhereXPathSelector') except ObjectDoesNotExistException as e: print e def __initializeProximityExtractor(self): try: self.proxExtractor = self.db.get_object(self.session, 'ProxExtractor') except ObjectDoesNotExistException as e: print e def __highlight(self, text, term, n): """Searches for text, retrieves n words either side of the text, which are retuned seperately""" term_concordance = list() text_len = len(text) term_len = len(term) term_indexes = [w.start() for w in re.finditer(term, text)] for idx in term_indexes: start = idx - n end = text_len if (idx + term_len + n) > text_len else idx + term_len + n term_concordance.append(text[start:idx] + '<b class="match term0">' + term + '</b>' + text[idx:end]) return term_concordance def open(self): """ The Cheshire get_object line should throw an exception if it can't open passed db """ try: self.db = self.server.get_object(self.session, self.databaseName) self.session.database = self.databaseName except Exception as e: print e print "openning database {} failed".format(self.databaseName) def create(self): if not os.path.exists(self.databasePath): os.makedirs(self.databasePath) # create cheshire metadata directory if needed, then initialize with empty list metadata_path = self.databasePath + self.cheshire_metadata_dir if not os.path.exists(metadata_path): os.makedirs(metadata_path) with open(metadata_path + '/' + self.databaseName, 'w') as f: json.dump({}, f) try: print "openning database {} to create".format(self.databasePath) os.system("cheshire3-init " + self.databasePath + " --database=" + self.databaseName) except Exception, e: print e
#!/bin/env python import os, sys import cheshire3 from cheshire3.baseObjects import Session from cheshire3.server import SimpleServer from cheshire3.internal import cheshire3Root from cheshire3.document import StringDocument from lxml import etree # Launch a Cheshire session session = Session() serverConfig = os.path.join(cheshire3Root, 'configs', 'serverConfig.xml') serv = SimpleServer(session, serverConfig) # Grab our objects db = serv.get_object(session, 'db_dickens') xmlp = db.get_object(session, 'LxmlParser') excl = db.get_object(session, 'quoteExcludeSpanSelector') if '--exclude' in sys.argv: data = """<div> <p type="speech" id="BH.c6.p114"> <s id="BH.c6.s340"> <qs/> "My dear Miss Summerson," <qe/> said Richard in a whisper,