Example #1
0
 def test_sessionDatabaseAssign(self):
     session = Session()
     session.database = "db_test1"
     self.assertEqual(session.database, "db_test1",
                      "session.database assignment failed")
     session.database = "db_test2"
     self.assertEqual(session.database, "db_test2",
                      "session.database re-assignment failed")
def build_architecture(data=None):
    global rebuild, session, serv, db, dbPath
    global editStore, authStore, instStore, userStore, xmlp
    global docStoreConfigStore

    session = Session()
    session.database = 'db_hubedit'
    session.environment = 'apache'
    # session.user = None
    serv = SimpleServer(session,
                        os.path.join(cheshire3Root,
                                     'configs',
                                     'serverConfig.xml'
                                     )
                        )
    db = serv.get_object(session, 'db_hubedit')

    dbPath = db.get_path(session, 'defaultPath')

    editStore = db.get_object(session, 'editingStore')
    userStore = db.get_object(session, 'hubAuthStore')
    instStore = db.get_object(session, 'institutionStore')
    docStoreConfigStore = db.get_object(session, 'documentStoreConfigStore')
    authStore = db.get_object(session, 'adminAuthStore')
    xmlp = db.get_object(session, 'LxmlParser')

    rebuild = False
Example #3
0
def process_update(self, req):
    self.version = "1.1"
    self.operationStatus = "fail"

    if (not req.version):
        diag = Diagnostic7()
        diag.message = "Mandatory 'version' parameter not supplied"
        diag.details = 'version'
        raise diag
    config = req.config
    db = config.parent
    req._db = db
    session = Session()
    session.environment = "apache"
    session.database = db.id

    if req.operation == "info:srw/operation/1/create":
        # Do Create
        self.handle_create(session, req)
    elif req.operation == "info:srw/operation/1/replace":
        # Do Replace
        self.handle_replace(session, req)
    elif req.operation == "info:srw/operation/1/delete":
        # Do Delete
        self.handle_delete(session, req)
    elif req.operation == "info:srw/operation/1/metadata":
        # Do Metadata update
        self.handle_metadata(session, req)
    else:
        # Barf
        diag = SRWDiagnostics.Diagnostic1()
        diag.details = "Unknown operation: %s" % req.operation
        self.diagnostics = [diag]
def build_architecture(data=None):
    # data argument provided for when function run as clean-up - always None
    global session, serv, db, dbPath, docParser, \
    fullTxr, fullSplitTxr, \
    ppFlow, \
    rebuild
    
    # globals line 1: re-establish session; maintain user if possible
    if (session):
        u = session.user
    else:
        u = None
    session = Session()
    session.database = 'db_ead'
    session.environment = 'apache'
    session.user = u
    serv = SimpleServer(session, os.path.join(cheshirePath, 'cheshire3', 'configs', 'serverConfig.xml'))
    db = serv.get_object(session, 'db_ead')
    dbPath = db.get_path(session, 'defaultPath')
    docParser = db.get_object(session, 'LxmlParser')
    # globals line 4: transformers
    fullTxr = db.get_object(session, 'htmlFullTxr')
    fullSplitTxr = db.get_object(session, 'htmlFullSplitTxr')
    # globals line 5: workflows
    ppFlow = db.get_object(session, 'preParserWorkflow'); ppFlow.load_cache(session, db)
    
    rebuild = False
    
Example #5
0
def build_architecture(data=None):
    global session, serv, db, qf, xmlp, recordStore, sentenceStore, paragraphStore, resultSetStore, articleTransformer, kwicTransformer
    session = Session()
    session.environment = 'apache'
    session.user = None
    serv = SimpleServer(session,
                        os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')
                        )
    
    session.database = 'db_' + databaseName
    db = serv.get_object(session, session.database)
    qf = db.get_object(session, 'defaultQueryFactory')
    xmlp = db.get_object(session, 'LxmlParser')
    recordStore = db.get_object(session, 'recordStore')
    articleTransformer = db.get_object(session, 'article-Txr')
    kwicTransformer = db.get_object(session, 'kwic-Txr')
Example #6
0
def build_architecture(data=None):
    global session, serv, db, qf, xmlp, recordStore, resultSetStore, idxStore, articleTransformer, kwicTransformer, proxExtractor, simpleExtractor, adf, fimi2, rule, arm, vecTxr, vectorStore, armTableTxr
    session = Session()
    session.environment = 'apache'
    session.user = None
    serv = SimpleServer(session,
                        os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')
                        )
    
    session.database = 'db_' + databaseName
    db = serv.get_object(session, session.database)
    qf = db.get_object(session, 'defaultQueryFactory')
    xmlp = db.get_object(session, 'LxmlParser')
    recordStore = db.get_object(session, 'recordStore')
    resultSetStore = db.get_object(session, 'resultSetStore')
    
    simpleExtractor = db.get_object(session, 'SimpleExtractor')
    proxExtractor = db.get_object(session, 'ProxExtractor')
    articleTransformer = db.get_object(session, 'article-Txr')
    kwicTransformer = db.get_object(session, 'kwic-Txr')
    idxStore = db.get_object(session, 'indexStore')
Example #7
0
        sys.stderr.write("for help use --help\n")
        sys.stderr.flush()
        return 2
    except Error as e:
        lgr.log_lvl(session, 40, str(e))
        if debug:
            raise
        return 1


# Build environment...
session = Session()
serv = SimpleServer(
    session,
    os.path.join(cheshire3Root,
                 'configs',
                 'serverConfig.xml'
                 )
)
session.database = 'db_hubedit'

db = serv.get_object(session, 'db_hubedit')
lgr = db.get_path(session, 'defaultLogger')
authStore = db.get_object(session, 'hubAuthStore')
superAuthStore = db.get_object(session, 'hubSuperAuthStore')

xmlp = db.get_object(session, 'LxmlParser')

if __name__ == "__main__":
    sys.exit(main())
        if db.get_setting(session, 'SRW') or db.get_setting(session, 'srw'):
            db._cacheProtocolMaps(session)
            map = db.protocolMaps.get('http://www.loc.gov/zing/srw/', None)
            # check that there's a path and that it can actually be requested from this handler
            if (map is not None) and \
               (map.databaseUrl.startswith((handlerUrl + '/', handlerUrl[1:] + '/'))):
                map2 = db.protocolMaps.get('http://www.loc.gov/zing/srw/update/', None)
                configs[map.databaseUrl] = {'http://www.loc.gov/zing/srw/' : map,
                                            'http://www.loc.gov/zing/srw/update/' : map2}
            elif (map is not None):
                apache.log_error("Database URL ({0}) does not match handler URL ({1}); will not handle database {{2}}".format(map.databaseUrl, handlerUrl, db.id), apache.APLOG_WARNING)
else:
    # too many dbs to cache in memory
    for dbid, conf in serv.databaseConfigs.iteritems():
        db = serv.get_object(session, dbid)
        session.database = dbid
        if db.get_setting(session, 'SRW') or db.get_setting(session, 'srw'):
            db._cacheProtocolMaps(session)
            pmap = db.protocolMaps.get('http://www.loc.gov/zing/srw/', None)
            if (pmap is not None) and (pmap.databaseUrl.startswith((handlerUrl + '/', handlerUrl[1:] + '/'))):
                configs[pmap.databaseUrl] = (dbid, {'http://www.loc.gov/zing/srw/': pmap.id})
                pmap2 = db.protocolMaps.get('http://www.loc.gov/zing/srw/update/', None)
                if pmap2 is not None:
                    configs[pmap.databaseUrl][1].update({'http://www.loc.gov/zing/srw/update/': pmap2.id})
            elif (pmap is not None):
                apache.log_error("Database URL ({0}) does not match handler URL ({1}); will not handle database {{2}}".format(pmap.databaseUrl, handlerUrl, dbid), apache.APLOG_WARNING)
        # remove cached db object
        try:
            del serv.objects[dbid]
        except KeyError:
            pass
Example #9
0
## count words in books, and list titles
## used to create booklist

import os
import re
from lxml import etree
import json

from cheshire3.document import StringDocument
from cheshire3.internal import cheshire3Root
from cheshire3.server import SimpleServer
from cheshire3.baseObjects import Session

session = Session()
session.database = 'db_dickens'
serv = SimpleServer(session,
                    os.path.join(cheshire3Root, 'configs', 'serverConfig.xml'))
db = serv.get_object(session, session.database)
qf = db.get_object(session, 'defaultQueryFactory')
resultSetStore = db.get_object(session, 'resultSetStore')
idxStore = db.get_object(session, 'indexStore')

list_books = [
    'BH', 'BR', 'DC', 'DS', 'ED', 'GE', 'HT', 'LD', 'MC', 'NN', 'OCS', 'OMF',
    'OT', 'PP', 'TTC', 'AgnesG', 'Antoni', 'arma', 'cran', 'Deronda',
    'dracula', 'emma', 'frank', 'jane', 'Jude', 'LadyAud', 'mary', 'NorthS',
    'persuasion', 'pride', 'sybil', 'Tess', 'basker', 'Pomp', 'mill', 'dorian',
    'Prof', 'native', 'alli', 'Jekyll', 'wwhite', 'vanity', 'VivianG', 'wh'
]

titles = {
Example #10
0
# Create the parser for the "remove" command
parser_remove = subparsers.add_parser('remove',
                                      help='Remove an existing user')

parser_remove.add_argument('username',
                           type=str,
                           nargs='*',
                           help='Username of the user(s) to remove')
parser_remove.set_defaults(func=remove_user)


# Build environment...
session = Session()
serv = SimpleServer(
    session,
    os.path.join(cheshire3Root,
                 'configs',
                 'serverConfig.xml'
                 )
)
session.database = 'db_ead'
db = serv.get_object(session, 'db_ead')
xmlp = db.get_object(session, 'LxmlParser')
authStore = db.get_object(session, 'hubAuthStore')          # Editors
superAuthStore = db.get_object(session, 'adminAuthStore')   # Hub Staff
instStore = db.get_object(session, 'institutionStore')      # Institutions


if __name__ == '__main__':
    sys.exit(main())
Example #11
0
 def test_sessionDatabaseAssign(self):
     session = Session()
     session.database = "db_test1"
     self.assertEqual(session.database, "db_test1", "session.database assignment failed")
     session.database = "db_test2"
     self.assertEqual(session.database, "db_test2", "session.database re-assignment failed")
#!/usr/bin/python
import sys
import os
from cheshire3.baseObjects import Session
from cheshire3.server import SimpleServer
from cheshire3.internal import cheshire3Root

# Build environment...
session = Session() # a Session - used to store
print cheshire3Root

serv = SimpleServer(session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml'))
session.logger = serv.get_path(session, 'defaultLogger') # a logger
db = serv.get_object(session, 'db_tdo_index') # the Database
session.database = db.id

#qf = db.get_object(session, 'defaultQueryFactory')

def testVec():
    recordStore = db.get_object(session, 'recordStore')
    rec = recordStore.fetch_record(session, 1)
    idx= db.get_object(session, 'idx-topic')
    vec = idx.fetch_vector(session, rec)
                map2 = db.protocolMaps.get(
                    'http://www.loc.gov/zing/srw/update/', None)
                configs[map.databaseUrl] = {
                    'http://www.loc.gov/zing/srw/': map,
                    'http://www.loc.gov/zing/srw/update/': map2
                }
            elif (map is not None):
                apache.log_error(
                    "Database URL ({0}) does not match handler URL ({1}); will not handle database {{2}}"
                    .format(map.databaseUrl, handlerUrl,
                            db.id), apache.APLOG_WARNING)
else:
    # too many dbs to cache in memory
    for dbid, conf in serv.databaseConfigs.iteritems():
        db = serv.get_object(session, dbid)
        session.database = dbid
        if db.get_setting(session, 'SRW') or db.get_setting(session, 'srw'):
            db._cacheProtocolMaps(session)
            pmap = db.protocolMaps.get('http://www.loc.gov/zing/srw/', None)
            if (pmap is not None) and (pmap.databaseUrl.startswith(
                (handlerUrl + '/', handlerUrl[1:] + '/'))):
                configs[pmap.databaseUrl] = (dbid, {
                    'http://www.loc.gov/zing/srw/':
                    pmap.id
                })
                pmap2 = db.protocolMaps.get(
                    'http://www.loc.gov/zing/srw/update/', None)
                if pmap2 is not None:
                    configs[pmap.databaseUrl][1].update(
                        {'http://www.loc.gov/zing/srw/update/': pmap2.id})
            elif (pmap is not None):
Example #14
0
sys.path.insert(1, os.path.join(cheshirePath, 'cheshire3', 'code'))

from cheshire3.baseObjects import Session
from cheshire3.server import SimpleServer
from cheshire3.document import StringDocument
from cheshire3 import exceptions as c3errors

from cheshire3.web.www_utils import read_file

# import customisable variables
#from localConfig import *

# Build environment...
session = Session()
serv = SimpleServer(session, os.path.join(cheshirePath, 'cheshire3', 'configs', 'serverConfig.xml'))
session.database = 'db_ead'

db = serv.get_object(session, 'db_ead')
lgr = db.get_path(session, 'defaultLogger')
recordStore = db.get_object(session, 'recordStore')
authStore = db.get_object(session, 'eadAuthStore')
compStore = db.get_object(session, 'componentStore')
clusDocFac = db.get_object(session, 'clusterDocumentFactory')

clusDb = serv.get_object(session, 'db_ead_cluster')
clusRecordStore = clusDb.get_object(session, 'eadClusterStore')

xmlp = db.get_object(session, 'LxmlParser')

lockfilepath = db.get_path(session, 'defaultPath') + '/indexing.lock'
Example #15
0
def groupDist(dist):
	hits = sum(dist.values())

	occs=0
	for v in dist:
		occs += int(v) * int(dist[v])

	for i in [1,2,3]:
		print "%s\t%s\t%0.2f" % (i, dist[i], float(dist[i])/float(hits) * 100.0)    
	
	fourPlus=0
	for i in range(4,max(dist.keys())):
		try:
			fourPlus += dist[i]
		except:
			continue
	print "4+\t%s\t%0.2f" % (fourPlus, float(fourPlus)/float(hits) * 100.0)    
	
	print "\n%i occurrences in %i articles" % (occs,hits)	

session = Session()
serv = SimpleServer(session, "../../configs/serverConfig.xml")
db = serv.get_object(session, 'db_news')
session.database = 'db_news'

idxStore = db.get_object(session, 'indexStore')
recStore = db.get_object(session, 'recordStore')



Example #16
0
# separate file containing display configs + some HMTL for table rows etc.
from clic.dickens.web.dickensWebConfig import *
from clic.dickens.web.dickensSearchHandler import SearchHandler
from clic.dickens.web.dickensBrowseHandler import BrowseHandler

cheshirePath = os.environ.get('HOME', '/home/cheshire')

logPath = os.path.join(cheshirePath, 'clic', 'www', databaseName, 'logs', 'searchHandler.log')
htmlPath = os.path.join(cheshirePath, 'clic', 'www', databaseName, 'html')

session = Session()
session.environment = 'apache'
session.user = None
serv = SimpleServer(session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml'))

session.database = 'db_dickens'
db = serv.get_object(session, session.database)
authStore = db.get_object(session, 'authStore')


# Discover objects...
def handler(req):
    global db, htmlPath, logPath, cheshirePath, xmlp, recordStore
    form = FieldStorage(req)
    try:
        dir = req.uri[1:].rsplit('/')[1]
    except IndexError:
        return apache.HTTP_NOT_FOUND
    remote_host = req.get_remote_host(apache.REMOTE_NOLOOKUP)
    lgr = FileLogger(logPath, remote_host) 
#    lgr.log(req.uri)