def main(argv=None): """Load data into a Cheshire3 database based on parameters in argv.""" global argparser, session, server, db if argv is None: args = argparser.parse_args() else: args = argparser.parse_args(argv) session = Session() server = SimpleServer(session, args.serverconfig) if args.database is None: try: dbid = identify_database(session, os.getcwd()) except EnvironmentError as e: server.log_critical(session, e.message) return 1 server.log_debug( session, "database identifier not specified, discovered: {0}".format(dbid)) else: dbid = args.database try: db = server.get_object(session, dbid) except ObjectDoesNotExistException: msg = """Cheshire3 database {0} does not exist. Please provide a different database identifier using the --database option. """.format(dbid) server.log_critical(session, msg) return 2 else: docFac = db.get_object(session, 'defaultDocumentFactory') docFac.load(session, args.data, args.cache, args.format, args.tagname, args.codec) wf = db.get_object(session, 'buildIndexWorkflow') wf.process(session, docFac)
def main(argv=None): global argparser, lockfilepath, lgr global session, server, db, lgr if argv is None: args = argparser.parse_args() else: args = argparser.parse_args(argv) session = Session() server = SimpleServer(session, args.serverconfig) if args.database is None: try: dbid = identify_database(session, os.getcwd()) except EnvironmentError as e: server.log_critical(session, e.message) return 1 server.log_debug( session, "database identifier not specified, discovered: {0}".format(dbid)) else: dbid = args.database try: db = server.get_object(session, dbid) except ObjectDoesNotExistException: msg = """Cheshire3 database {0} does not exist. Please provide a different database identifier using the --database option. """.format(dbid) server.log_critical(session, msg) return 2 else: lgr = db.get_path(session, 'defaultLogger') pass return args.func(args)
def main(argv=None): """Search a Cheshire3 database based on query in argv.""" global argparser, session, server, db if argv is None: args = argparser.parse_args() else: args = argparser.parse_args(argv) session = Session() server = SimpleServer(session, args.serverconfig) if args.database is None: try: dbid = identify_database(session, os.getcwd()) except EnvironmentError as e: server.log_critical(session, e.message) return 1 server.log_debug(session, "database identifier not specified, discovered: {0}".format(dbid)) else: dbid = args.database try: db = server.get_object(session, dbid) except ObjectDoesNotExistException: msg = """Cheshire3 database {0} does not exist. Please provide a different database identifier using the --database option. """.format( dbid ) server.log_critical(session, msg) return 2 else: qFac = db.get_object(session, "defaultQueryFactory") query = qFac.get_query(session, args.query, format=args.format) resultSet = db.search(session, query) return _format_resultSet(resultSet, maximumRecords=args.maxRecs, startRecord=args.startRec)
def build_architecture(data=None): # data argument provided for when function run as clean-up - always None global session, serv, db, dbPath, docParser, \ fullTxr, fullSplitTxr, \ ppFlow, \ rebuild # globals line 1: re-establish session; maintain user if possible if (session): u = session.user else: u = None session = Session() session.database = 'db_ead' session.environment = 'apache' session.user = u serv = SimpleServer(session, os.path.join(cheshirePath, 'cheshire3', 'configs', 'serverConfig.xml')) db = serv.get_object(session, 'db_ead') dbPath = db.get_path(session, 'defaultPath') docParser = db.get_object(session, 'LxmlParser') # globals line 4: transformers fullTxr = db.get_object(session, 'htmlFullTxr') fullSplitTxr = db.get_object(session, 'htmlFullSplitTxr') # globals line 5: workflows ppFlow = db.get_object(session, 'preParserWorkflow'); ppFlow.load_cache(session, db) rebuild = False
def build_architecture(data=None): global rebuild, session, serv, db, dbPath global editStore, authStore, instStore, userStore, xmlp global docStoreConfigStore session = Session() session.database = 'db_hubedit' session.environment = 'apache' # session.user = None serv = SimpleServer(session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml' ) ) db = serv.get_object(session, 'db_hubedit') dbPath = db.get_path(session, 'defaultPath') editStore = db.get_object(session, 'editingStore') userStore = db.get_object(session, 'hubAuthStore') instStore = db.get_object(session, 'institutionStore') docStoreConfigStore = db.get_object(session, 'documentStoreConfigStore') authStore = db.get_object(session, 'adminAuthStore') xmlp = db.get_object(session, 'LxmlParser') rebuild = False
def __init__(self, session, logger): self.session = session session.database = 'db_dickens' serv = SimpleServer(session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml') ) self.db = serv.get_object(session, session.database) self.concStore = self.db.get_object(session, 'concordanceStore') self.collStore = self.db.get_object(session, 'collocateStore') self.idxStore = self.db.get_object(session, 'indexStore') self.logger = logger
def main(argv=None): """Load data into a Cheshire3 database based on parameters in argv.""" global argparser, session, server, db if argv is None: args = argparser.parse_args() else: args = argparser.parse_args(argv) session = Session() server = SimpleServer(session, args.serverconfig) if args.database is None: try: dbid = identify_database(session, os.getcwd()) except EnvironmentError as e: server.log_critical(session, e.message) return 1 server.log_debug( session, "database identifier not specified, discovered: {0}".format(dbid) ) else: dbid = args.database try: db = server.get_object(session, dbid) except ObjectDoesNotExistException: msg = """Cheshire3 database {0} does not exist. Please provide a different database identifier using the --database option. """.format(dbid) server.log_critical(session, msg) return 2 else: # Allow for multiple data arguments docFac = db.get_object(session, 'defaultDocumentFactory') for dataArg in args.data: try: docFac.load(session, dataArg, args.cache, args.format, args.tagname, args.codec ) except MissingDependencyException as e: server.log_critical(session, e.reason) missingDependencies = e.dependencies raise MissingDependencyException('cheshire3-load script', missingDependencies ) wf = db.get_object(session, 'buildIndexWorkflow') wf.process(session, docFac)
def build_architecture(data=None): global session, serv, db, qf, xmlp, recordStore, sentenceStore, paragraphStore, resultSetStore, articleTransformer, kwicTransformer session = Session() session.environment = 'apache' session.user = None serv = SimpleServer(session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml') ) session.database = 'db_' + databaseName db = serv.get_object(session, session.database) qf = db.get_object(session, 'defaultQueryFactory') xmlp = db.get_object(session, 'LxmlParser') recordStore = db.get_object(session, 'recordStore') articleTransformer = db.get_object(session, 'article-Txr') kwicTransformer = db.get_object(session, 'kwic-Txr')
def build_architecture(data=None): global session, serv, db, qf, xmlp, recordStore, resultSetStore, idxStore, articleTransformer, kwicTransformer, proxExtractor, simpleExtractor, adf, fimi2, rule, arm, vecTxr, vectorStore, armTableTxr session = Session() session.environment = 'apache' session.user = None serv = SimpleServer(session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml') ) session.database = 'db_' + databaseName db = serv.get_object(session, session.database) qf = db.get_object(session, 'defaultQueryFactory') xmlp = db.get_object(session, 'LxmlParser') recordStore = db.get_object(session, 'recordStore') resultSetStore = db.get_object(session, 'resultSetStore') simpleExtractor = db.get_object(session, 'SimpleExtractor') proxExtractor = db.get_object(session, 'ProxExtractor') articleTransformer = db.get_object(session, 'article-Txr') kwicTransformer = db.get_object(session, 'kwic-Txr') idxStore = db.get_object(session, 'indexStore')
def main(argv=None): """Search a Cheshire3 database based on query in argv.""" global argparser, session, server, db if argv is None: args = argparser.parse_args() else: args = argparser.parse_args(argv) session = Session() server = SimpleServer(session, args.serverconfig) if args.database is None: try: dbid = identify_database(session, os.getcwd()) except EnvironmentError as e: server.log_critical(session, e.message) return 1 server.log_debug( session, "database identifier not specified, discovered: {0}".format(dbid) ) else: dbid = args.database try: db = server.get_object(session, dbid) except ObjectDoesNotExistException: msg = """Cheshire3 database {0} does not exist. Please provide a different database identifier using the --database option. """.format(dbid) server.log_critical(session, msg) return 2 else: qFac = db.get_object(session, 'defaultQueryFactory') query = qFac.get_query(session, args.query, format=args.format) resultSet = db.search(session, query) return _format_resultSet(resultSet, maximumRecords=args.maxRecs, startRecord=args.startRec)
def getCheshire3Env(args): """Init and return Cheshire3 Session, Server and Database. Intialize Cheshire3 Session, Server and Database objects based on ``args``. """ # Create a Session session = Session() # Get the Server based on given serverConfig file server = SimpleServer(session, args.serverconfig) # Try to get the Database if args.database is None: try: dbid = identify_database(session, os.getcwd()) except EnvironmentError as e: server.log_critical(session, e.message) raise server.log_debug( session, "database identifier not specified, discovered: {0}".format(dbid) ) else: dbid = args.database try: db = server.get_object(session, dbid) except ObjectDoesNotExistException: msg = """Cheshire3 database {0} does not exist. Please provide a different database identifier using the --database option. """.format(dbid) server.log_critical(session, msg) raise else: # Attach a default Logger to the Session session.logger = db.get_path(session, 'defaultLogger') return session, server, db
import os import sys import cheshire3 from cheshire3.baseObjects import Session from cheshire3.server import SimpleServer from cheshire3.internal import cheshire3Root # Launch a Cheshire session session = Session() serverConfig = os.path.join(cheshire3Root, 'configs', 'serverConfig.xml') serv = SimpleServer(session, serverConfig) # Grab our objects db = serv.get_object(session, 'db_test_pgsql') recStore = db.get_object(session, 'recordStore') rssStore = db.get_object(session, 'resultSetStore') qfac = db.get_object(session, 'defaultQueryFactory') # Prove that we have some records in postgres rec = recStore.fetch_record(session, 0) if not rec: print "Could not retrieve records, have you run cheshire3-load?" sys.exit() # Make a query, and store results in resultSetStore try: qidx = sys.argv.index('--query') except:
def groupDist(dist): hits = sum(dist.values()) occs=0 for v in dist: occs += int(v) * int(dist[v]) for i in [1,2,3]: print "%s\t%s\t%0.2f" % (i, dist[i], float(dist[i])/float(hits) * 100.0) fourPlus=0 for i in range(4,max(dist.keys())): try: fourPlus += dist[i] except: continue print "4+\t%s\t%0.2f" % (fourPlus, float(fourPlus)/float(hits) * 100.0) print "\n%i occurrences in %i articles" % (occs,hits) session = Session() serv = SimpleServer(session, "../../configs/serverConfig.xml") db = serv.get_object(session, 'db_news') session.database = 'db_news' idxStore = db.get_object(session, 'indexStore') recStore = db.get_object(session, 'recordStore')
class Chapter_view(object): def __init__(self): self.session = Session() self.session.database = 'db_dickens' self.serv = SimpleServer( self.session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')) self.db = self.serv.get_object(self.session, self.session.database) self.qf = self.db.get_object(self.session, 'defaultQueryFactory') self.resultSetStore = self.db.get_object(self.session, 'resultSetStore') self.idxStore = self.db.get_object(self.session, 'indexStore') def search_book(self, book): session = self.session db = self.db qf = self.qf book_query = qf.get_query(session, 'c3.book-idx = "%s"' % book) book_results = db.search(session, book_query) return book_results def create_chapterXhtml(self, book_results): session = self.session book = self.search_book(book_results) #chapter_list = [] ## one list per chapter chapter_dict = {} for ch in book: rec = ch.fetch_record(session) tree = rec.get_dom(session).getroottree() #print etree.tostring(tree) title = tree.xpath('//div//title')[0].text ## for html page ch_number = tree.xpath('//div')[0].get('num') ## for filename countwords = 0 paralist = [] ## para for para in tree.xpath('//div//p'): paralist.append('<p>') spanlist = [] for i, w in enumerate(para.xpath('./descendant::w')): countwords += 1 try: ## only if there is preceding n ## only print n if not empty (as we add space outside the spans - see *) if not re.match( '[^\s$]|[\W|^--$]', w.xpath('./preceding-sibling::n[1]')[0].text): preceding_n = w.xpath( './preceding-sibling::n[1]')[0].text else: preceding_n = '' except: preceding_n = '' ## only print n if not empty (as we add space outside the spans - see *) try: ## only if there is following n if not w.xpath( './following-sibling::n[1]')[0].text == ' ': following_n = w.xpath( './following-sibling::n[1]')[0].text else: following_n = '' except: following_n = '' word = preceding_n + w.text + following_n spanlist.append('<span id="%s">%s</span>' % (countwords, word)) spans = ' '.join(spanlist) ## * spans = re.sub('--', ' --', spans) paralist.append(spans) paralist.append('</p>') paras = ''.join(paralist) chapter = ''.join('<!DOCTYPE html>' + '\n' + 'html lang="en">' + '\n' + '<head>' + '\n' + 'meta charset="utf-8">' + '\n' + '<title>' + title + '</title>' + '\n' '</head>' + '\n\n' + '<body>' + '\n\n' + paras + '\n\n' + '</body>' + '\n\n' + '</html>') chapter_dict[chapter] = ch_number print tree.xpath('//div')[0].get('book'), ch_number #break return chapter_dict
class Cheshire3WordList(object): ''' Main class used to build Cheshire3 word lists. These can be of individual tokens or of clusters (also called n-grams or phrases). ''' def __init__(self): ''' Sets up the connection with Cheshire3. ''' self.session = Session() self.session.database = 'db_dickens' self.serv = SimpleServer( self.session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')) self.db = self.serv.get_object(self.session, self.session.database) self.qf = self.db.get_object(self.session, 'defaultQueryFactory') self.resultSetStore = self.db.get_object(self.session, 'resultSetStore') self.idxStore = self.db.get_object(self.session, 'indexStore') def build_subcorpus_clauses(self, subcorpora): ''' Takes a list of subcorpora and turns it into a CQL query that Cheshire3 can process. ''' if not isinstance(subcorpora, list): raise IOError, 'subcorpora should be a list' clauses = [] for subcorpus in subcorpora: if subcorpus in ['dickens', 'ntc']: idx = 'subCorpus-idx' else: idx = 'book-idx' clauses.append('c3.{0} = "{1}"'.format(idx, subcorpus)) return clauses def get_facets(self, index_name, subcorpora): ''' Get the actual word counts ('facets') using the index and the list of subcorpora. ''' clauses = self.build_subcorpus_clauses(subcorpora) query = self.qf.get_query(self.session, ' or '.join(clauses)) results = self.db.search(self.session, query) idx = self.db.get_object(self.session, index_name) facets = idx.facets(self.session, results) return facets def facets_to_df(self, facets): ''' Converts the facets into a dataframe that can be manipulated more easily. ''' def select_third_value(value): ''' Facets come in the following format: [(u'a', (38, 879, 84372)), (u'all', (1067, 879, 15104)), This function returns the third values, respectively 84372 and 15104 in the example above. ''' return value[2] dataframe = pd.DataFrame(facets, columns=['Type', 'Raw facet']) dataframe.index += 1 dataframe['Count'] = dataframe['Raw facet'].apply(select_third_value) self.total = dataframe.Count.sum() dataframe['Percentage'] = dataframe.Count / self.total * 100 dataframe['Percentage'] = dataframe['Percentage'].round(decimals=2) dataframe.sort_values(by='Count', ascending=False, inplace=True) dataframe['Empty'] = '' return dataframe def wordlist_to_json(self): ''' Returns a json string that is adapted to the CLiC API. ''' # do not work on the original wordlist = copy.deepcopy(self.wordlist) del wordlist['Raw facet'] wordlist = wordlist[['Empty', 'Type', 'Count', 'Percentage']] return wordlist.to_json(orient='values') def build_wordlist(self, index_name, subcorpora): ''' The core method that needs to be called in order to actually generate the keyword list. Once this method is called the .wordlist attribute will return the wordlist. ''' facets = self.get_facets(index_name, subcorpora) self.wordlist = self.facets_to_df(facets)
for db in serv.databases.itervalues(): if db.get_setting(session, 'SRW') or db.get_setting(session, 'srw'): db._cacheProtocolMaps(session) map = db.protocolMaps.get('http://www.loc.gov/zing/srw/', None) # check that there's a path and that it can actually be requested from this handler if (map is not None) and \ (map.databaseUrl.startswith((handlerUrl + '/', handlerUrl[1:] + '/'))): map2 = db.protocolMaps.get('http://www.loc.gov/zing/srw/update/', None) configs[map.databaseUrl] = {'http://www.loc.gov/zing/srw/' : map, 'http://www.loc.gov/zing/srw/update/' : map2} elif (map is not None): apache.log_error("Database URL ({0}) does not match handler URL ({1}); will not handle database {{2}}".format(map.databaseUrl, handlerUrl, db.id), apache.APLOG_WARNING) else: # too many dbs to cache in memory for dbid, conf in serv.databaseConfigs.iteritems(): db = serv.get_object(session, dbid) session.database = dbid if db.get_setting(session, 'SRW') or db.get_setting(session, 'srw'): db._cacheProtocolMaps(session) pmap = db.protocolMaps.get('http://www.loc.gov/zing/srw/', None) if (pmap is not None) and (pmap.databaseUrl.startswith((handlerUrl + '/', handlerUrl[1:] + '/'))): configs[pmap.databaseUrl] = (dbid, {'http://www.loc.gov/zing/srw/': pmap.id}) pmap2 = db.protocolMaps.get('http://www.loc.gov/zing/srw/update/', None) if pmap2 is not None: configs[pmap.databaseUrl][1].update({'http://www.loc.gov/zing/srw/update/': pmap2.id}) elif (pmap is not None): apache.log_error("Database URL ({0}) does not match handler URL ({1}); will not handle database {{2}}".format(pmap.databaseUrl, handlerUrl, dbid), apache.APLOG_WARNING) # remove cached db object try: del serv.objects[dbid] except KeyError:
import os import sys import cheshire3 from cheshire3.baseObjects import Session from cheshire3.server import SimpleServer from cheshire3.internal import cheshire3Root # Launch a Cheshire session session = Session() serverConfig = os.path.join(cheshire3Root, "configs", "serverConfig.xml") serv = SimpleServer(session, serverConfig) # Grab our objects db = serv.get_object(session, "db_test_query_highlight") recStore = db.get_object(session, "recordStore") qfac = db.get_object(session, "defaultQueryFactory") q = qfac.get_query(session, 'cql.anywhere = "e f g"') rs = db.search(session, q) # rs[0].proxInfo = [[[1, 4, 8, 7], [1, 5, 10, 8], [1, 6, 12, 9]]] # being element 1,1,1 / wordoffset 4,5,6 / byteoffset 8,10,12 / termid 7,8,9 rec = rs[0].fetch_record(session) loqth = db.get_object(session, "LOQTHTransformer") doc = loqth.process_record(session, rec) print doc.get_raw(session)
(map.databaseUrl.startswith((handlerUrl + '/', handlerUrl[1:] + '/'))): map2 = db.protocolMaps.get( 'http://www.loc.gov/zing/srw/update/', None) configs[map.databaseUrl] = { 'http://www.loc.gov/zing/srw/': map, 'http://www.loc.gov/zing/srw/update/': map2 } elif (map is not None): apache.log_error( "Database URL ({0}) does not match handler URL ({1}); will not handle database {{2}}" .format(map.databaseUrl, handlerUrl, db.id), apache.APLOG_WARNING) else: # too many dbs to cache in memory for dbid, conf in serv.databaseConfigs.iteritems(): db = serv.get_object(session, dbid) session.database = dbid if db.get_setting(session, 'SRW') or db.get_setting(session, 'srw'): db._cacheProtocolMaps(session) pmap = db.protocolMaps.get('http://www.loc.gov/zing/srw/', None) if (pmap is not None) and (pmap.databaseUrl.startswith( (handlerUrl + '/', handlerUrl[1:] + '/'))): configs[pmap.databaseUrl] = (dbid, { 'http://www.loc.gov/zing/srw/': pmap.id }) pmap2 = db.protocolMaps.get( 'http://www.loc.gov/zing/srw/update/', None) if pmap2 is not None: configs[pmap.databaseUrl][1].update( {'http://www.loc.gov/zing/srw/update/': pmap2.id})
from clic.dickens.web.dickensWebConfig import * from clic.dickens.web.dickensSearchHandler import SearchHandler from clic.dickens.web.dickensBrowseHandler import BrowseHandler cheshirePath = os.environ.get('HOME', '/home/cheshire') logPath = os.path.join(cheshirePath, 'clic', 'www', databaseName, 'logs', 'searchHandler.log') htmlPath = os.path.join(cheshirePath, 'clic', 'www', databaseName, 'html') session = Session() session.environment = 'apache' session.user = None serv = SimpleServer(session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')) session.database = 'db_dickens' db = serv.get_object(session, session.database) authStore = db.get_object(session, 'authStore') # Discover objects... def handler(req): global db, htmlPath, logPath, cheshirePath, xmlp, recordStore form = FieldStorage(req) try: dir = req.uri[1:].rsplit('/')[1] except IndexError: return apache.HTTP_NOT_FOUND remote_host = req.get_remote_host(apache.REMOTE_NOLOOKUP) lgr = FileLogger(logPath, remote_host) # lgr.log(req.uri) # lgr.log('directory is %s' % dir)
def main(argv=None): """Initialize a Cheshire 3 database based on parameters in argv.""" global argparser, session, server, db if argv is None: args = argparser.parse_args() else: args = argparser.parse_args(argv) session = Session() server = SimpleServer(session, args.serverconfig) if args.database is None: if args.directory.endswith(os.path.sep): args.directory = args.directory[:-1] # Find local database name to use as basis of database id dbid = "db_{0}".format(os.path.basename(args.directory)) server.log_debug(session, ("database identifier not specified, defaulting to: " "{0}".format(dbid))) else: dbid = args.database try: db = server.get_object(session, dbid) except ObjectDoesNotExistException: # Doesn't exists, so OK to init it pass else: # TODO: check for --force ? msg = """database with id '{0}' has already been init'd. \ Please specify a different id using the --database option.""".format(dbid) server.log_critical(session, msg) raise ValueError(msg) # Create a .cheshire3 directory and populate it c3_dir = os.path.join(os.path.abspath(args.directory), '.cheshire3') for dir_path in [ c3_dir, os.path.join(c3_dir, 'stores'), os.path.join(c3_dir, 'indexes'), os.path.join(c3_dir, 'logs') ]: try: os.makedirs(dir_path) except OSError: # Directory already exists server.log_warning(session, "directory already exists {0}".format(dir_path)) # Generate config file(s) xmlFilesToWrite = {} # Generate Protocol Map(s) (ZeeRex) zrx = create_defaultZeerex(dbid, args) zrxPath = os.path.join(c3_dir, 'zeerex_sru.xml') args.zeerexPath = zrxPath xmlFilesToWrite[zrxPath] = zrx # Generate generic database config dbConfig = create_defaultConfig(dbid, args) dbConfigPath = os.path.join(c3_dir, 'config.xml') xmlFilesToWrite[dbConfigPath] = dbConfig # Generate config for generic selectors selectorConfig = create_defaultConfigSelectors() path = os.path.join(c3_dir, 'configSelectors.xml') dbConfig = include_configByPath(dbConfig, path) xmlFilesToWrite[path] = selectorConfig # Generate config for generic indexes indexConfig = create_defaultConfigIndexes() path = os.path.join(c3_dir, 'configIndexes.xml') dbConfig = include_configByPath(dbConfig, path) xmlFilesToWrite[path] = indexConfig # Generate config for default Workflows workflowConfig = create_defaultConfigWorkflows() path = os.path.join(c3_dir, 'configWorkflows.xml') dbConfig = include_configByPath(dbConfig, path) xmlFilesToWrite[path] = workflowConfig # Write configs to files for path, node in xmlFilesToWrite.iteritems(): with open(path, 'w') as conffh: conffh.write( etree.tostring(node, pretty_print=True, encoding="utf-8")) # Tell the server to register the config file server.register_databaseConfigFile(session, dbConfigPath) return 0
# License: GPL import sys import os from cheshire3.baseObjects import Session from cheshire3.internal import cheshire3Root from cheshire3.server import SimpleServer if __name__ == '__main__': session = Session() # 8 servConfig = os.path.join(cheshire3Root, 'configs', 'serverConfig.xml') # 9 serv = SimpleServer(session, servConfig) # 10 db = serv.get_object(session, 'db_tei') # 11 docFac = db.get_object(session, 'defaultDocumentFactory') # 12 docParser = db.get_object(session, 'TeiParser') # 13 recStore = db.get_object(session, 'TeiRecordStore') # 14 docFac.load( session, "/home/luoxing/data/test.xml", cache=2, tagName='page') db.begin_indexing(session) recStore.begin_storing(session) for doc in docFac: try: rec = docParser.process_document(session, doc) except: print(doc.get_raw(session)) sys.exit() id = recStore.create_record(session, rec)
class Concordance(object): ''' This concordance takes terms, index names, book selections, and search type as input values and returns json with the search term, ten words to the left and ten to the right, and location information. This can be used in an ajax api. ''' def __init__(self): ''' Set up a cheshire3 session/connection to the database. This initilisation does not handle the actual search term (cf. build_and_run_query). ''' self.session = Session() self.session.database = 'db_dickens' self.serv = SimpleServer(self.session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml') ) self.db = self.serv.get_object(self.session, self.session.database) self.qf = self.db.get_object(self.session, 'defaultQueryFactory') self.resultSetStore = self.db.get_object(self.session, 'resultSetStore') self.idxStore = self.db.get_object(self.session, 'indexStore') #self.logger = self.db.get_object(self.session, 'concordanceLogger') def build_and_run_query(self, terms, idxName, Materials, selectWords): ''' Builds a cheshire query and runs it. Its output is a tuple of which the first element is a resultset and the second element is number of search terms in the query. ''' subcorpus = [] for corpus in Materials: MatIdx = 'book-idx' # ntc is 19th century? if corpus in ['dickens', 'ntc']: MatIdx_Vol = 'subCorpus-idx' subcorpus.append('c3.{0} = "{1}"'.format(MatIdx_Vol, corpus)) else: subcorpus.append('c3.{0} = "{1}"'.format(MatIdx, corpus)) ## search whole phrase or individual words? if selectWords == "whole": # for historic purposes: number_of_search_terms was originally nodeLength number_of_search_terms = len(terms.split()) terms = [terms] else: #FIXME is this correct in case of an AND search? number_of_search_terms = 1 terms = terms.split() ## define search term term_clauses = [] for term in terms: term_clauses.append('c3.{0} = "{1}"'.format(idxName, term)) ## conduct database search ## note: /proxInfo needed to search individual books query = self.qf.get_query(self.session, ' or '.join(subcorpus) + ' and/proxInfo ' + ' or '.join(term_clauses)) result_set = self.db.search(self.session, query) return result_set, number_of_search_terms def create_concordance(self, terms, idxName, Materials, selectWords): """ main concordance method create a list of lists containing each three contexts left - node -right, and a list within those contexts containing each word. Add two separate lists containing metadata information: [ [left context - word 1, word 2, etc.], [node - word 1, word 2, etc], [right context - word 1, etc], [chapter metadata], [book metadata] ], etc. """ ##self.logger.log(10, 'CREATING CONCORDANCE FOR RS: {0} in {1} - {2}'.format(terms, idxName, Materials)) #TODO change the variable names of the function itself (Materials -> selection, etc.) conc_lines = [] # return concordance lines in list word_window = 10 # word_window is set to 10 by default - on both sides of node result_set, number_of_search_terms = self.build_and_run_query(terms, idxName, Materials, selectWords) ## get total number of hits (not yet used in interface) total_count = 0 if len(result_set) > 0: #FIXME What does cheshire return if there are no results? None? or [] ? for result in result_set: total_count = total_count + len(result.proxInfo) ## search through each record (chapter) and identify location of search term(s) if len(result_set) > 0: count = 0 for result in result_set: ## get xml record rec = result.fetch_record(self.session) # Each time a search term is found in a document # (each match) is described in terms of a proxInfo. # # It is insufficiently clear what proxInfo is. # It takes the form of three nested lists: # # [[[0, 169, 1033, 15292]], # [[0, 171, 1045, 15292]], etc. ] # # We currently assume the following values: # # * the second item in the deepest list (169, 171) # is the id of the <w> (word) node # * the first item is the id of the root element from # which to start counting to find the word node # for instance, 0 for a chapter view (because the chapter # is the root element), but 151 for a search in quotes # text. # * the third element is the exact character (spaces, and # and punctuation (stored in <n> (non-word) nodes # at which the search term starts # * the fourth element is the total amount of characters # in the document? for match in result.proxInfo: count += 1 #FIXME will this code be run if there are more than 1000 results? will it not break out of the for loop? #or will it break out of the if loop if count > 1000: ## current search limit: 1000 break else: #FIXME while this code be run if there are more than 1000 results? will it not break out of the for loop? if idxName in ['chapter-idx']: word_id = match[0][1] elif idxName in ['quote-idx', 'non-quote-idx', 'longsus-idx', 'shortsus-idx']: eid, word_id = match[0][0], match[0][1] ## locate search term in xml search_term = rec.process_xpath(self.session, '//*[@eid="%d"]/following::w[%d+1]' % (eid, word_id)) ## get xml of sentence sentence_tree = rec.process_xpath(self.session, '//*[@eid="%d"]/following::w[%d+1]/ancestor-or-self::s' % (eid, word_id)) chapter_tree = rec.process_xpath(self.session, '//*[@eid="%d"]/following::w[%d+1]/ancestor-or-self::div' % (eid, word_id)) ## counts words preceding sentence prec_s_tree = chapter_tree[0].xpath('/div/p/s[@sid="%s"]/preceding::s/descendant::w' % sentence_tree[0].get('sid')) prec_s_wcount = len(prec_s_tree) ## count words within sentence count_s = 0 for word in chapter_tree[0].xpath('/div/p/s[@sid="%s"]/descendant::w' % sentence_tree[0].get('sid')): if not word.get('o') == search_term[0].get('o'): count_s += 1 else: break ## word number within chapter is adding word count in preceding sentence and word count in current sentence wcount = prec_s_wcount + count_s #FIXME `w = wcount` dynamically reassigns a value to `w` #that is already a value, namely the one refactored to `word_id` word_id = wcount ## Define leftOnset as w - 10, then get all w and n between that and node leftOnset = max(1, word_id - word_window + 1) ## we operate with word position, not list position (word 1 = 0 position in list) nodeOnset = word_id + 1 nodeOffset = word_id + number_of_search_terms try: rightOnset = nodeOffset + 1 except: rightOnset = None ch_words = len(rec.process_xpath(self.session, '/div/descendant::w')) ## move to level for each record (chapter) ? rightOffset = min(rightOnset + word_window, rightOnset + (ch_words - rightOnset) + 1 ) left_text = [] for l in range(leftOnset, nodeOnset): try: left_n_pr = rec.process_xpath(self.session, '/div/descendant::w[%d]/preceding-sibling::n[1]' % l)[0].text except: left_n_pr = '' left_w = rec.process_xpath(self.session, '/div/descendant::w[%d]' % l)[0].text try: left_n_fo = rec.process_xpath(self.session, '/div/descendant::w[%d]/following-sibling::n[1]' % l)[0].text except: left_n_fo = '' left_text.append(''.join(left_n_pr + left_w + left_n_fo)) node_text = [] for n in range(nodeOnset, rightOnset): try: node_n_pr = rec.process_xpath(self.session, '/div/descendant::w[%d]/preceding-sibling::n[1]' % n)[0].text except: node_n_pr = '' node_w = rec.process_xpath(self.session, '/div/descendant::w[%d]' % n)[0].text try: node_n_fo = rec.process_xpath(self.session, '/div/descendant::w[%d]/following-sibling::n[1]' % n)[0].text except: node_n_fo node_text.append(''.join(node_n_pr + node_w + node_n_fo)) right_text = [] for r in range(rightOnset, rightOffset): try: right_n_pr = rec.process_xpath(self.session, '/div/descendant::w[%d]/preceding-sibling::n[1]' % r)[0].text except: right_n_pr = '' right_w = rec.process_xpath(self.session, '/div/descendant::w[%d]' % r)[0].text try: right_n_fo = rec.process_xpath(self.session, '/div/descendant::w[%d]/following-sibling::n[1]' % r)[0].text except: right_n_fo = '' right_text.append(''.join(right_n_pr + right_w + right_n_fo)) ### book = rec.process_xpath(self.session, '/div')[0].get('book') chapter = rec.process_xpath(self.session, '/div')[0].get('num') para_chap = rec.process_xpath(self.session, '/div/descendant::w[%d+1]/ancestor-or-self::p' % word_id)[0].get('pid') sent_chap = rec.process_xpath(self.session, '/div/descendant::w[%d+1]/ancestor-or-self::s' % word_id)[0].get('sid') word_chap = word_id ## count paragraph, sentence and word in whole book count_para = 0 count_sent = 0 count_word = 0 booktitle = [] total_word = [] for b in booklist: if b[0][0] == book: booktitle.append(b[0][1]) total_word.append(b[1][0][2]) for j, c in enumerate(b[2]): while j+1 < int(chapter): count_para = count_para + int(c[0]) count_sent = count_sent + int(c[1]) count_word = count_word + int(c[2]) j += 1 break ## total word in chapter if j+1 == int(chapter): chapWordCount = b[2][j][2] book_title = booktitle[0] ## get book title total_word = total_word[0] para_book = count_para + int(para_chap) sent_book = count_sent + int(sent_chap) word_book = count_word + int(word_chap) conc_line = [left_text, node_text, right_text, [book, book_title, chapter, para_chap, sent_chap, str(word_chap), str(chapWordCount)], [str(para_book), str(sent_book), str(word_book), str(total_word)]] conc_lines.append(conc_line) #conc_lines.insert(0, len(conc_lines)) conc_lines.insert(0, total_count) return conc_lines
class ChapterRepository(object): ''' Responsible for providing access to chapter resources within Cheshire. ''' def __init__(self): self.session = Session() self.session.database = 'db_dickens' self.serv = SimpleServer( self.session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')) self.db = self.serv.get_object(self.session, self.session.database) self.qf = self.db.get_object(self.session, 'defaultQueryFactory') def get_book_title(self, book): ''' Gets the title of a book from the json file booklist.json book -- string - the book id/accronym e.g. BH ''' for b in booklist: if (b[0][0] == book): book_title = b[0][1] return book_title def get_chapter(self, chapter_number, book): ''' Returns transformed XML for given chapter & book chapter_number -- integer book -- string - the book id/accronym e.g. BH ''' query = self.qf.get_query(self.session, 'c3.book-idx = "%s"' % book) result_set = self.db.search(self.session, query) chapter_ptr = result_set[chapter_number - 1] chapter = chapter_ptr.fetch_record(self.session) transformer = self.db.get_object(self.session, 'chapterView-Txr') formatted_chapter = transformer.process_record( self.session, chapter).get_raw(self.session) book_title = self.get_book_title(book) return formatted_chapter, book_title def get_raw_chapter(self, chapter_number, book): ''' Returns raw chapter XML for given chapter & book chapter_number -- integer book -- string - the book id/accronym e.g. BH ''' query = self.qf.get_query(self.session, 'c3.book-idx = "%s"' % book) result_set = self.db.search(self.session, query) chapter_ptr = result_set[chapter_number - 1] chapter = chapter_ptr.fetch_record(self.session) return chapter.get_dom(self.session) def get_chapter_with_highlighted_search_term(self, chapter_number, book, wid, search_term): ''' Returns transformed XML for given chapter & book with the search highlighted. We create the transformer directly so that we can pass extra parameters to it at runtime. In this case the search term. chapter_number -- integer book -- string - the book id/accronym e.g. BH wid -- integer - word index search_term -- string - term to highlight ''' raw_chapter = self.get_raw_chapter(chapter_number, book) # load our chapter xslt directly as a transformer path_to_xsl = CLIC_DIR + "/dbs/dickens/xsl/chapterView.xsl" xslt_doc = etree.parse(path_to_xsl) transformer = etree.XSLT(xslt_doc) terms = search_term.split(' ') # pass the search term into our transformer transformed_chapter = transformer(raw_chapter, wid="'%s'" % wid, numberOfSearchTerms="%s" % len(terms)) book_title = self.get_book_title(book) # return transformed html return etree.tostring(transformed_chapter), book_title
def main(argv=None): """Initialize a Cheshire 3 database based on parameters in argv.""" global argparser, session, server, db if argv is None: args = argparser.parse_args() else: args = argparser.parse_args(argv) session = Session() server = SimpleServer(session, args.serverconfig) if args.database is None: if args.directory.endswith(os.path.sep): args.directory = args.directory[:-1] # Find local database name to use as basis of database id dbid = "db_{0}".format(os.path.basename(args.directory)) server.log_debug(session, ("database identifier not specified, defaulting to: " "{0}".format(dbid))) else: dbid = args.database try: db = server.get_object(session, dbid) except ObjectDoesNotExistException: # Doesn't exists, so OK to init it pass else: # TODO: check for --force ? msg = """database with id '{0}' has already been init'd. \ Please specify a different id using the --database option.""".format(dbid) server.log_critical(session, msg) raise ValueError(msg) # Create a .cheshire3 directory and populate it c3_dir = os.path.join(os.path.abspath(args.directory), '.cheshire3') for dir_path in [c3_dir, os.path.join(c3_dir, 'stores'), os.path.join(c3_dir, 'indexes'), os.path.join(c3_dir, 'logs')]: try: os.makedirs(dir_path) except OSError: # Directory already exists server.log_warning( session, "directory already exists {0}".format(dir_path) ) # Generate config file(s) xmlFilesToWrite = {} # Generate Protocol Map(s) (ZeeRex) zrx = create_defaultZeerex(dbid, args) zrxPath = os.path.join(c3_dir, 'zeerex_sru.xml') args.zeerexPath = zrxPath xmlFilesToWrite[zrxPath] = zrx # Generate generic database config dbConfig = create_defaultConfig(dbid, args) dbConfigPath = os.path.join(c3_dir, 'config.xml') xmlFilesToWrite[dbConfigPath] = dbConfig # Generate config for generic selectors selectorConfig = create_defaultConfigSelectors() path = os.path.join(c3_dir, 'configSelectors.xml') dbConfig = include_configByPath(dbConfig, path) xmlFilesToWrite[path] = selectorConfig # Generate config for generic indexes indexConfig = create_defaultConfigIndexes() path = os.path.join(c3_dir, 'configIndexes.xml') dbConfig = include_configByPath(dbConfig, path) xmlFilesToWrite[path] = indexConfig # Generate config for default Workflows workflowConfig = create_defaultConfigWorkflows() path = os.path.join(c3_dir, 'configWorkflows.xml') dbConfig = include_configByPath(dbConfig, path) xmlFilesToWrite[path] = workflowConfig # Write configs to files for path, node in xmlFilesToWrite.iteritems(): with open(path, 'w') as conffh: conffh.write(etree.tostring(node, pretty_print=True, encoding="utf-8" ) ) # Tell the server to register the config file server.register_databaseConfigFile(session, dbConfigPath) return 0
# Create the parser for the "remove" command parser_remove = subparsers.add_parser('remove', help='Remove an existing user') parser_remove.add_argument('username', type=str, nargs='*', help='Username of the user(s) to remove') parser_remove.set_defaults(func=remove_user) # Build environment... session = Session() serv = SimpleServer( session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml' ) ) session.database = 'db_ead' db = serv.get_object(session, 'db_ead') xmlp = db.get_object(session, 'LxmlParser') authStore = db.get_object(session, 'hubAuthStore') # Editors superAuthStore = db.get_object(session, 'adminAuthStore') # Hub Staff instStore = db.get_object(session, 'institutionStore') # Institutions if __name__ == '__main__': sys.exit(main())
import cheshire3 from cheshire3.baseObjects import Session from cheshire3.server import SimpleServer from cheshire3.internal import cheshire3Root from cheshire3.document import StringDocument from lxml import etree # Launch a Cheshire session session = Session() serverConfig = os.path.join(cheshire3Root, 'configs', 'serverConfig.xml') serv = SimpleServer(session, serverConfig) # Grab our objects db = serv.get_object(session, 'db_dickens') xmlp = db.get_object(session, 'LxmlParser') excl = db.get_object(session, 'quoteExcludeSpanSelector') if '--exclude' in sys.argv: data = """<div> <p type="speech" id="BH.c6.p114"> <s id="BH.c6.s340"> <qs/> "My dear Miss Summerson," <qe/> said Richard in a whisper, <qs/> "I have ten pounds that I received from Mr. Kenge. </s>
import time, os import cStringIO as StringIO from cheshire3.server import SimpleServer from cheshire3.utils import elementType from cheshire3.baseObjects import Session from cheshire3 import document from cheshire3.workflow import SimpleWorkflow, CachingWorkflow from cheshire3 import dynamic from cheshire3.exceptions import * from cheshire3.internal import cheshire3Root session = Session() session.environment = "apache" serv = SimpleServer(session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')) mdp = serv.get_object(session, 'defaultParser') configs = {} serv._cacheDatabases(session) for db in serv.databases.values(): #if db.get_setting(session, 'C3WEP'): if db.get_setting(session, 'remoteWorkflow'): db._cacheProtocolMaps(session) #map = db.protocolMaps.get('c3WorflowExecutionProtocol', None) #configs[map.databaseUrl] = {'c3WorflowExecutionProtocol' : map} map = db.protocolMaps.get('http://www.cheshire3.org/protocols/workflow/1.0/', None) configs[map.databaseUrl] = {'http://www.cheshire3.org/protocols/workflow/1.0/' : map} class reqHandler: log = None
cheshirePath = os.environ.get('C3HOME', '/home/cheshire') session = Session() try: from mod_python import apache from mod_python.util import FieldStorage except ImportError: pass else: session.environment = "apache" serv = SimpleServer( session, os.path.join(cheshirePath, 'cheshire3', 'configs', 'serverConfig.xml')) lxmlParser = serv.get_object(session, 'LxmlParser') configs = {} dbs = {} serv._cacheDatabases(session) for db in serv.databases.values(): if db.get_setting(session, 'oai-pmh'): db._cacheProtocolMaps(session) map = db.protocolMaps.get( 'http://www.openarchives.org/OAI/2.0/OAI-PMH', None) # check that there's a path and that it can actually be requested from this handler if (map is not None): configs[map.databaseName] = map dbs[map.databaseName] = db
import os import re from lxml import etree import json from cheshire3.document import StringDocument from cheshire3.internal import cheshire3Root from cheshire3.server import SimpleServer from cheshire3.baseObjects import Session session = Session() session.database = 'db_dickens' serv = SimpleServer(session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')) db = serv.get_object(session, session.database) qf = db.get_object(session, 'defaultQueryFactory') resultSetStore = db.get_object(session, 'resultSetStore') idxStore = db.get_object(session, 'indexStore') list_books = [ 'BH', 'BR', 'DC', 'DS', 'ED', 'GE', 'HT', 'LD', 'MC', 'NN', 'OCS', 'OMF', 'OT', 'PP', 'TTC', 'AgnesG', 'Antoni', 'arma', 'cran', 'Deronda', 'dracula', 'emma', 'frank', 'jane', 'Jude', 'LadyAud', 'mary', 'NorthS', 'persuasion', 'pride', 'sybil', 'Tess', 'basker', 'Pomp', 'mill', 'dorian', 'Prof', 'native', 'alli', 'Jekyll', 'wwhite', 'vanity', 'VivianG', 'wh' ] titles = { 'BH': 'Bleak House', 'BR': 'Barnaby Rudge',
from cheshire3.baseObjects import Session from cheshire3.server import SimpleServer from cheshire3.document import StringDocument from cheshire3 import exceptions as c3errors from cheshire3.web.www_utils import read_file # import customisable variables #from localConfig import * # Build environment... session = Session() serv = SimpleServer(session, os.path.join(cheshirePath, 'cheshire3', 'configs', 'serverConfig.xml')) session.database = 'db_ead' db = serv.get_object(session, 'db_ead') lgr = db.get_path(session, 'defaultLogger') recordStore = db.get_object(session, 'recordStore') authStore = db.get_object(session, 'eadAuthStore') compStore = db.get_object(session, 'componentStore') clusDocFac = db.get_object(session, 'clusterDocumentFactory') clusDb = serv.get_object(session, 'db_ead_cluster') clusRecordStore = clusDb.get_object(session, 'eadClusterStore') xmlp = db.get_object(session, 'LxmlParser') lockfilepath = db.get_path(session, 'defaultPath') + '/indexing.lock' def inputError(msg): lgr.log_error(session, msg)
def main(argv=None): """Load data into a Cheshire3 database based on parameters in argv.""" global argparser, session, server, db if argv is None: args = argparser.parse_args() else: args = argparser.parse_args(argv) if irods is None: raise MissingDependencyException('icheshire3-load script', 'irods (PyRods)' ) session = Session() server = SimpleServer(session, args.serverconfig) if args.database is None: try: dbid = identify_database(session, os.getcwd()) except EnvironmentError as e: server.log_critical(session, e.message) return 1 server.log_debug( session, "database identifier not specified, discovered: {0}".format(dbid)) else: dbid = args.database try: db = server.get_object(session, dbid) except ObjectDoesNotExistException: msg = """Cheshire3 database {0} does not exist. Please provide a different database identifier using the --database option. """.format(dbid) server.log_critical(session, msg) return 2 else: # Allow for multiple data arguments docFac = db.get_object(session, 'defaultDocumentFactory') for dataArg in args.data: if dataArg.startswith('irods://'): parsed = urlsplit(dataArg) else: # Examine current environment status, myEnv = irods.getRodsEnv() try: host = myEnv.getRodsHost() except AttributeError: host = myEnv.rodsHost # Port try: myEnv.getRodsPort() except AttributeError: port = myEnv.rodsPort # User try: username = myEnv.getRodsUserName() except AttributeError: username = myEnv.rodsUserName netloc = '{0}@{1}:{2}'.format(username, host, port) try: cqm = myEnv.getRodsCwd() except AttributeError: cwd = myEnv.rodsCwd path = '/'.join([cwd, dataArg]) parsed = SplitResult('irods', netloc, path, None, None) dataArg = urlunsplit(parsed) server.log_debug(session, dataArg) if args.format is None or not args.format.startswith('i'): fmt = 'irods' else: fmt = args.format server.log_debug(session, fmt) try: docFac.load(session, dataArg, args.cache, fmt, args.tagname, args.codec) except MissingDependencyException as e: server.log_critical(session, e.reason) missingDependencies = e.dependencies raise MissingDependencyException('cheshire3-load script', missingDependencies) wf = db.get_object(session, 'buildIndexWorkflow') wf.process(session, docFac)
from cheshire3.server import SimpleServer from cheshire3.utils import elementType from cheshire3.baseObjects import Session from cheshire3 import document from cheshire3.workflow import SimpleWorkflow, CachingWorkflow from cheshire3 import dynamic from cheshire3.exceptions import * cheshirePath = os.environ.get('C3HOME', '/home/cheshire') session = Session() session.environment = "apache" serv = SimpleServer( session, os.path.join(cheshirePath, 'cheshire3', 'configs', 'serverConfig.xml')) mdp = serv.get_object(session, 'defaultParser') configs = {} serv._cacheDatabases(session) for db in serv.databases.values(): #if db.get_setting(session, 'C3WEP'): if db.get_setting(session, 'remoteWorkflow'): db._cacheProtocolMaps(session) #map = db.protocolMaps.get('c3WorflowExecutionProtocol', None) #configs[map.databaseUrl] = {'c3WorflowExecutionProtocol' : map} map = db.protocolMaps.get( 'http://www.cheshire3.org/protocols/workflow/1.0/', None) configs[map.databaseUrl] = { 'http://www.cheshire3.org/protocols/workflow/1.0/': map }
def get_databasesAndConfigs(session, serv): """Get and return database and config mappings from Server.""" dbs = {} configs = {} serv._cacheDatabases(session) for db in serv.databases.values(): if db.get_setting(session, 'oai-pmh'): db._cacheProtocolMaps(session) pmap = db.protocolMaps.get( 'http://www.openarchives.org/OAI/2.0/OAI-PMH', None ) # Check that there's a path and that it can actually be requested # from this handler if (pmap is not None): configs[pmap.databaseName] = pmap dbs[pmap.databaseName] = db return dbs, configs # Cheshire3 architecture session = Session() serv = SimpleServer(session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml') ) lxmlParser = serv.get_object(session, 'LxmlParser') dbs, configs = get_databasesAndConfigs(session, serv) c3OaiServers = {}
#!/usr/bin/python import sys import os from cheshire3.baseObjects import Session from cheshire3.server import SimpleServer from cheshire3.internal import cheshire3Root # Build environment... session = Session() # a Session - used to store print cheshire3Root serv = SimpleServer(session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')) session.logger = serv.get_path(session, 'defaultLogger') # a logger db = serv.get_object(session, 'db_tdo_index') # the Database session.database = db.id #qf = db.get_object(session, 'defaultQueryFactory') def testVec(): recordStore = db.get_object(session, 'recordStore') rec = recordStore.fetch_record(session, 1) idx= db.get_object(session, 'idx-topic') vec = idx.fetch_vector(session, rec)
sys.stderr.write("for help use --help\n") sys.stderr.flush() return 2 except Error as e: lgr.log_lvl(session, 40, str(e)) if debug: raise return 1 # Build environment... session = Session() serv = SimpleServer( session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml' ) ) session.database = 'db_hubedit' db = serv.get_object(session, 'db_hubedit') lgr = db.get_path(session, 'defaultLogger') authStore = db.get_object(session, 'hubAuthStore') superAuthStore = db.get_object(session, 'hubSuperAuthStore') xmlp = db.get_object(session, 'LxmlParser') if __name__ == "__main__": sys.exit(main())
from lxml import etree from crypt import crypt import cheshire3 from cheshire3.baseObjects import Session from cheshire3.server import SimpleServer from cheshire3.internal import cheshire3Root from cheshire3.document import StringDocument session = Session() serverConfig = os.path.join(cheshire3Root, 'configs', 'serverConfig.xml') serv = SimpleServer(session, serverConfig) db = serv.get_object(session, 'db_dickens') session.database = 'db_dickens' qf = db.get_object(session, 'defaultQueryFactory') df = db.get_object(session, 'SimpleDocumentFactory') concStore = db.get_object(session, 'concordanceStore') authStore = db.get_object(session, 'authStore') recStore = db.get_object(session, 'recordStore') ampPreP = db.get_object(session, 'AmpPreParser') xmlp = db.get_object(session, 'LxmlParser') if ('-austen' in sys.argv):