コード例 #1
0
def process_update(self, req):
    self.version = "1.1"
    self.operationStatus = "fail"

    if (not req.version):
        diag = Diagnostic7()
        diag.message = "Mandatory 'version' parameter not supplied"
        diag.details = 'version'
        raise diag
    config = req.config
    db = config.parent
    req._db = db
    session = Session()
    session.environment = "apache"
    session.database = db.id

    if req.operation == "info:srw/operation/1/create":
        # Do Create
        self.handle_create(session, req)
    elif req.operation == "info:srw/operation/1/replace":
        # Do Replace
        self.handle_replace(session, req)
    elif req.operation == "info:srw/operation/1/delete":
        # Do Delete
        self.handle_delete(session, req)
    elif req.operation == "info:srw/operation/1/metadata":
        # Do Metadata update
        self.handle_metadata(session, req)
    else:
        # Barf
        diag = SRWDiagnostics.Diagnostic1()
        diag.details = "Unknown operation: %s" % req.operation
        self.diagnostics = [diag]
コード例 #2
0
def build_architecture(data=None):
    # data argument provided for when function run as clean-up - always None
    global session, serv, db, dbPath, docParser, \
    fullTxr, fullSplitTxr, \
    ppFlow, \
    rebuild
    
    # globals line 1: re-establish session; maintain user if possible
    if (session):
        u = session.user
    else:
        u = None
    session = Session()
    session.database = 'db_ead'
    session.environment = 'apache'
    session.user = u
    serv = SimpleServer(session, os.path.join(cheshirePath, 'cheshire3', 'configs', 'serverConfig.xml'))
    db = serv.get_object(session, 'db_ead')
    dbPath = db.get_path(session, 'defaultPath')
    docParser = db.get_object(session, 'LxmlParser')
    # globals line 4: transformers
    fullTxr = db.get_object(session, 'htmlFullTxr')
    fullSplitTxr = db.get_object(session, 'htmlFullSplitTxr')
    # globals line 5: workflows
    ppFlow = db.get_object(session, 'preParserWorkflow'); ppFlow.load_cache(session, db)
    
    rebuild = False
    
コード例 #3
0
def build_architecture(data=None):
    global rebuild, session, serv, db, dbPath
    global editStore, authStore, instStore, userStore, xmlp
    global docStoreConfigStore

    session = Session()
    session.database = 'db_hubedit'
    session.environment = 'apache'
    # session.user = None
    serv = SimpleServer(session,
                        os.path.join(cheshire3Root,
                                     'configs',
                                     'serverConfig.xml'
                                     )
                        )
    db = serv.get_object(session, 'db_hubedit')

    dbPath = db.get_path(session, 'defaultPath')

    editStore = db.get_object(session, 'editingStore')
    userStore = db.get_object(session, 'hubAuthStore')
    instStore = db.get_object(session, 'institutionStore')
    docStoreConfigStore = db.get_object(session, 'documentStoreConfigStore')
    authStore = db.get_object(session, 'adminAuthStore')
    xmlp = db.get_object(session, 'LxmlParser')

    rebuild = False
コード例 #4
0
ファイル: testSession.py プロジェクト: tanmoydeb07/cheshire3
 def test_sessionDatabaseAssign(self):
     session = Session()
     session.database = "db_test1"
     self.assertEqual(session.database, "db_test1",
                      "session.database assignment failed")
     session.database = "db_test2"
     self.assertEqual(session.database, "db_test2",
                      "session.database re-assignment failed")
コード例 #5
0
ファイル: testSession.py プロジェクト: tanmoydeb07/cheshire3
 def test_sessionEnvironmentAssign(self):
     session = Session()
     session.environment = "apache"
     self.assertEqual(session.environment, "apache",
                      "session.environment assignment failed")
     session.environment = "terminal"
     self.assertEqual(session.environment, "terminal",
                      "session.environment re-assignment failed")
コード例 #6
0
def build_architecture(data=None):
    global session, serv, db, qf, xmlp, recordStore, sentenceStore, paragraphStore, resultSetStore, articleTransformer, kwicTransformer
    session = Session()
    session.environment = 'apache'
    session.user = None
    serv = SimpleServer(session,
                        os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')
                        )
    
    session.database = 'db_' + databaseName
    db = serv.get_object(session, session.database)
    qf = db.get_object(session, 'defaultQueryFactory')
    xmlp = db.get_object(session, 'LxmlParser')
    recordStore = db.get_object(session, 'recordStore')
    articleTransformer = db.get_object(session, 'article-Txr')
    kwicTransformer = db.get_object(session, 'kwic-Txr')
コード例 #7
0
 def __init__(self, store):
     self.store = store
     self.session = Session()
     self.cxn = store._open(self.session, 'byteCount')
     self.cursor = self.cxn.cursor()
     (key, val) = self.cursor.first()
     self.nextData = (key, self.store.fetch_data(self.session, key))
コード例 #8
0
ファイル: chapter_repository.py プロジェクト: jdejoode/clic
 def __init__(self):
     self.session = Session()
     self.session.database = 'db_dickens'
     self.serv = SimpleServer(
         self.session,
         os.path.join(cheshire3Root, 'configs', 'serverConfig.xml'))
     self.db = self.serv.get_object(self.session, self.session.database)
     self.qf = self.db.get_object(self.session, 'defaultQueryFactory')
コード例 #9
0
def build_architecture(data=None):
    global session, serv, db, qf, xmlp, recordStore, resultSetStore, idxStore, articleTransformer, kwicTransformer, proxExtractor, simpleExtractor, adf, fimi2, rule, arm, vecTxr, vectorStore, armTableTxr
    session = Session()
    session.environment = 'apache'
    session.user = None
    serv = SimpleServer(session,
                        os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')
                        )
    
    session.database = 'db_' + databaseName
    db = serv.get_object(session, session.database)
    qf = db.get_object(session, 'defaultQueryFactory')
    xmlp = db.get_object(session, 'LxmlParser')
    recordStore = db.get_object(session, 'recordStore')
    resultSetStore = db.get_object(session, 'resultSetStore')
    
    simpleExtractor = db.get_object(session, 'SimpleExtractor')
    proxExtractor = db.get_object(session, 'ProxExtractor')
    articleTransformer = db.get_object(session, 'article-Txr')
    kwicTransformer = db.get_object(session, 'kwic-Txr')
    idxStore = db.get_object(session, 'indexStore')
コード例 #10
0
ファイル: testRecord.py プロジェクト: tanmoydeb07/cheshire3
 def setUp(self):
     self.session = Session()
     self.records = []
     cls = self._get_class()
     for d in self._get_data():
         recHash = {'xml': d,
                    'record': cls(self._parse_data(d),
                                  xml=d,
                                  byteCount=len(d))
                    }
         
         self.records.append(recHash)
コード例 #11
0
 def __init__(self):
     '''
     Sets up the connection with Cheshire3. 
     '''
     self.session = Session()
     self.session.database = 'db_dickens'
     self.serv = SimpleServer(
         self.session,
         os.path.join(cheshire3Root, 'configs', 'serverConfig.xml'))
     self.db = self.serv.get_object(self.session, self.session.database)
     self.qf = self.db.get_object(self.session, 'defaultQueryFactory')
     self.resultSetStore = self.db.get_object(self.session,
                                              'resultSetStore')
     self.idxStore = self.db.get_object(self.session, 'indexStore')
コード例 #12
0
 def setUp(self):
     self.session = Session()
     serverConfig = os.path.join(cheshire3Root, 'configs',
                                 'serverConfig.xml')
     self.server = SimpleServer(self.session, serverConfig)
     for config in self._get_dependencyConfigs():
         identifier = config.get('id')
         self.server.subConfigs[identifier] = config
     # Disable stdout logging
     lgr = self.server.get_path(self.session, 'defaultLogger')
     lgr.minLevel = 60
     # Create object that will be tested
     config = self._get_config()
     self.testObj = makeObjectFromDom(self.session, config, self.server)
コード例 #13
0
ファイル: testDocument.py プロジェクト: tanmoydeb07/cheshire3
 def setUp(self):
     self.session = Session()
     self.testPairs = [
         ('application/xml', '<doc><foo/><bar><baz/></baz></doc>', []),
         ('text/plain', 'This is my document!', ['aProcessingObject'])
     ]
     self.testDocs = []
     for mt, data, processHistory in self.testPairs:
         self.testDocs.append(
             StringDocument(data,
                            mimeType=mt,
                            creator=id(self),
                            history=processHistory,
                            byteCount=len(data),
                            wordCount=len(data.split(' '))))
コード例 #14
0
ファイル: utils.py プロジェクト: bloomonkey/archiveshub
def getCheshire3Env(args):
    """Init and return Cheshire3 Session, Server and Database.

    Intialize Cheshire3 Session, Server and Database objects based on
    ``args``.
    """
    # Create a Session
    session = Session()
    # Get the Server based on given serverConfig file
    server = SimpleServer(session, args.serverconfig)
    # Try to get the Database
    if args.database is None:
        try:
            dbid = identify_database(session, os.getcwd())
        except EnvironmentError as e:
            server.log_critical(session, e.message)
            raise
        server.log_debug(
            session,
            "database identifier not specified, discovered: {0}".format(dbid)
        )
    else:
        dbid = args.database
    try:
        db = server.get_object(session, dbid)
    except ObjectDoesNotExistException:
        msg = """Cheshire3 database {0} does not exist.
Please provide a different database identifier using the --database option.
""".format(dbid)
        server.log_critical(session, msg)
        raise
    else:
        # Attach a default Logger to the Session
        session.logger = db.get_path(session, 'defaultLogger')

    return session, server, db
コード例 #15
0
    def __init__(self):
        '''
        Set up a cheshire3 session/connection to the database. This initilisation does
        not handle the actual search term (cf. build_and_run_query).
        '''

        self.session = Session()
        self.session.database = 'db_dickens'
        self.serv = SimpleServer(self.session,
                                 os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')
                                 )
        self.db = self.serv.get_object(self.session, self.session.database)
        self.qf = self.db.get_object(self.session, 'defaultQueryFactory')
        self.resultSetStore = self.db.get_object(self.session, 'resultSetStore')
        self.idxStore = self.db.get_object(self.session, 'indexStore')
コード例 #16
0
    def setUp(self):
        """Setup some ResultsetItems and put them into ResultSets to evaluate.

        N.B. a == b, other pairs should not evaluate as equal
        """
        self.session = session = Session()
        # Set up same 4 ResultSetItems as for SimpleResultSetItemTestCase
        self.rsi1 = SimpleResultSetItem(session,
                                        id=0,
                                        recStore="recordStore",
                                        occs=5,
                                        database="",
                                        diagnostic=None,
                                        weight=0.5,
                                        resultSet=None,
                                        numeric=None)
        self.rsi2 = SimpleResultSetItem(session,
                                        id=0,
                                        recStore="recordStore",
                                        occs=3,
                                        database="",
                                        diagnostic=None,
                                        weight=0.5,
                                        resultSet=None,
                                        numeric=None)
        self.rsi3 = SimpleResultSetItem(session,
                                        id=1,
                                        recStore="recordStore",
                                        occs=1,
                                        database="",
                                        diagnostic=None,
                                        weight=0.5,
                                        resultSet=None,
                                        numeric=None)
        self.rsi4 = SimpleResultSetItem(session,
                                        id=0,
                                        recStore="recordStore2",
                                        occs=2,
                                        database="",
                                        diagnostic=None,
                                        weight=0.5,
                                        resultSet=None,
                                        numeric=None)
        # Put identical (rsi1 and rsi2) into separate ResultSets
        self.a = SimpleResultSet(session, [self.rsi1, self.rsi3], id="a")
        self.b = SimpleResultSet(session, [self.rsi2, self.rsi4], id="b")
コード例 #17
0
 def __init__(self, session, name=None, manager=None, debug=0):
     # This sets self.name
     mp.Process.__init__(self, name=name)
     self.inPipe = None
     self.debug = debug
     self.manager = manager
     # Reconstruct our own session, so as to not overwrite task
     self.session = Session(user=session.user,
                            logger=session.logger,
                            task=self.name,
                            database=session.database,
                            environment=session.environment)
     self.session.server = session.server
     self.server = session.server
     self.database = self.server.get_object(self.session, session.database)
     try:
         name = property(mp.Process.get_name, mp.Process.set_name)
     except AttributeError:
         pass
コード例 #18
0
    def setUp(self):
        """Setup some ResultsetItems to evaluate.

        N.B. a == b, other pairs should not evaluate as equal
        """
        self.session = session = Session()
        self.a = SimpleResultSetItem(session,
                                     id=0,
                                     recStore="recordStore",
                                     occs=0,
                                     database="",
                                     diagnostic=None,
                                     weight=0.5,
                                     resultSet=None,
                                     numeric=None)
        self.b = SimpleResultSetItem(session,
                                     id=0,
                                     recStore="recordStore",
                                     occs=0,
                                     database="",
                                     diagnostic=None,
                                     weight=0.5,
                                     resultSet=None,
                                     numeric=None)
        self.c = SimpleResultSetItem(session,
                                     id=1,
                                     recStore="recordStore",
                                     occs=0,
                                     database="",
                                     diagnostic=None,
                                     weight=0.5,
                                     resultSet=None,
                                     numeric=None)
        self.d = SimpleResultSetItem(session,
                                     id=0,
                                     recStore="recordStore2",
                                     occs=0,
                                     database="",
                                     diagnostic=None,
                                     weight=0.5,
                                     resultSet=None,
                                     numeric=None)
コード例 #19
0
ファイル: recordStore.py プロジェクト: tanmoydeb07/cheshire3
def directoryRecordStoreIter(store):
    session = Session()
    for id_, data in directoryStoreIter(store):
        yield store._process_data(session, id_, data)
コード例 #20
0
from mod_python.util import FieldStorage
import os, re, time
from xml.sax.saxutils import escape
from lxml import etree
from lxml.builder import ElementMaker

from cheshire3.server import SimpleServer
from cheshire3.baseObjects import Session
from cheshire3.utils import flattenTexts
from cheshire3 import cqlParser
from cheshire3 import internal
from cheshire3 import exceptions as c3errors

cheshirePath = os.environ.get('C3HOME', '/home/cheshire')

session = Session()
session.environment = "apache"
serv = SimpleServer(
    session,
    os.path.join(cheshirePath, 'cheshire3', 'configs', 'serverConfig.xml'))

configs = {}

# determine the root URL of this handler

for configitem in apache.config_tree():
    if configitem[0] == "DocumentRoot":
        docRoot = configitem[1].strip("\"'")
handlerUrl = apache.get_handler_root().replace(docRoot, "")

if len(serv.databaseConfigs) < 25:
コード例 #21
0
ファイル: testSession.py プロジェクト: tanmoydeb07/cheshire3
 def test_sessionEnvironmentDefault(self):
     session = Session()
     self.assertEqual(session.environment, "terminal")
コード例 #22
0
ファイル: testSession.py プロジェクト: tanmoydeb07/cheshire3
 def test_sessionEnvironmentInit(self):
     session = Session(environment="apache")
     self.assertEqual(session.environment, "apache")
コード例 #23
0
from mod_python.util import FieldStorage
import os, re, time
from xml.sax.saxutils import escape
from lxml import etree
from lxml.builder import ElementMaker

from cheshire3.server import SimpleServer
from cheshire3.baseObjects import Session
from cheshire3.utils import flattenTexts
from cheshire3 import cqlParser
from cheshire3 import internal
from cheshire3 import exceptions as c3errors

cheshirePath = os.environ.get('C3HOME', '/home/cheshire')

session = Session()
session.environment = "apache"
serv = SimpleServer(session, os.path.join(cheshirePath, 'cheshire3', 'configs', 'serverConfig.xml'))

configs = {}

# determine the root URL of this handler

for configitem in apache.config_tree():
    if configitem[0] == "DocumentRoot":
        docRoot = configitem[1].strip("\"'")
handlerUrl = apache.get_handler_root().replace(docRoot, "")


if len(serv.databaseConfigs) < 25:
    # relatively few dbs - we can safely cache them
コード例 #24
0
ファイル: testSession.py プロジェクト: tanmoydeb07/cheshire3
 def test_sessionDatabaseInit(self):
     session = Session(database="db_test1")
     self.assertEqual(session.database, "db_test1")
コード例 #25
0
ファイル: dickensGetDist.py プロジェクト: cheshire3/clic
def groupDist(dist):
	hits = sum(dist.values())

	occs=0
	for v in dist:
		occs += int(v) * int(dist[v])

	for i in [1,2,3]:
		print "%s\t%s\t%0.2f" % (i, dist[i], float(dist[i])/float(hits) * 100.0)    
	
	fourPlus=0
	for i in range(4,max(dist.keys())):
		try:
			fourPlus += dist[i]
		except:
			continue
	print "4+\t%s\t%0.2f" % (fourPlus, float(fourPlus)/float(hits) * 100.0)    
	
	print "\n%i occurrences in %i articles" % (occs,hits)	

session = Session()
serv = SimpleServer(session, "../../configs/serverConfig.xml")
db = serv.get_object(session, 'db_news')
session.database = 'db_news'

idxStore = db.get_object(session, 'indexStore')
recStore = db.get_object(session, 'recordStore')



コード例 #26
0
ファイル: testSession.py プロジェクト: tanmoydeb07/cheshire3
 def test_sessionInstance(self):
     session = Session()
     self.assertIsInstance(session, Session)
コード例 #27
0
# from cheshire3.utils import reader
from cheshire3.baseObjects import Session

# Apache Config:
#<Directory /usr/local/apache2/htdocs/srw>
#  SetHandler mod_python
#  PythonDebug On
#  PythonPath "['/home/cheshire/c3/code', '/usr/local/lib/python2.3/lib-dynload']+sys.path"
#  PythonHandler srwApacheHandler
#</Directory>

# NB. SetHandler, not AddHandler.

cheshirePath = os.environ.get('C3HOME', '/home/cheshire')

session = Session()
session.environment = "apache"
serv = SimpleServer(session, os.path.join(cheshirePath, 'cheshire3', 'configs', 'serverConfig.xml'))

configs = {}
serv._cacheDatabases(session)
for db in serv.databases.values():
    if db.get_setting(session, 'SRW') or db.get_setting(session, 'srw'):
        db._cacheProtocolMaps(session)
        map = db.protocolMaps.get('http://www.loc.gov/zing/srw/', None)
        map2 = db.protocolMaps.get('http://www.loc.gov/zing/srw/update/', None)
        configs[map.databaseUrl] = {'http://www.loc.gov/zing/srw/' : map,
                                    'http://www.loc.gov/zing/srw/update/' : map2}


class reqHandler:
コード例 #28
0
ファイル: run.py プロジェクト: cheshire3/cheshire3-archives
    
cheshirePath = os.environ.get('C3HOME', '/home/cheshire/')
sys.path.insert(1, os.path.join(cheshirePath, 'cheshire3', 'code'))

from cheshire3.baseObjects import Session
from cheshire3.server import SimpleServer
from cheshire3.document import StringDocument
from cheshire3 import exceptions as c3errors

from cheshire3.web.www_utils import read_file

# import customisable variables
#from localConfig import *

# Build environment...
session = Session()
serv = SimpleServer(session, os.path.join(cheshirePath, 'cheshire3', 'configs', 'serverConfig.xml'))
session.database = 'db_ead'

db = serv.get_object(session, 'db_ead')
lgr = db.get_path(session, 'defaultLogger')
recordStore = db.get_object(session, 'recordStore')
authStore = db.get_object(session, 'eadAuthStore')
compStore = db.get_object(session, 'componentStore')
clusDocFac = db.get_object(session, 'clusterDocumentFactory')

clusDb = serv.get_object(session, 'db_ead_cluster')
clusRecordStore = clusDb.get_object(session, 'eadClusterStore')

xmlp = db.get_object(session, 'LxmlParser')
コード例 #29
0
ファイル: users.py プロジェクト: bloomonkey/archiveshub
                        )
parser_list.set_defaults(func=list_users)

# Create the parser for the "remove" command
parser_remove = subparsers.add_parser('remove',
                                      help='Remove an existing user')

parser_remove.add_argument('username',
                           type=str,
                           nargs='*',
                           help='Username of the user(s) to remove')
parser_remove.set_defaults(func=remove_user)


# Build environment...
session = Session()
serv = SimpleServer(
    session,
    os.path.join(cheshire3Root,
                 'configs',
                 'serverConfig.xml'
                 )
)
session.database = 'db_ead'
db = serv.get_object(session, 'db_ead')
xmlp = db.get_object(session, 'LxmlParser')
authStore = db.get_object(session, 'hubAuthStore')          # Editors
superAuthStore = db.get_object(session, 'adminAuthStore')   # Hub Staff
instStore = db.get_object(session, 'institutionStore')      # Institutions

コード例 #30
0
ファイル: zApacheHandler.py プロジェクト: ReinSi/cheshire3
from PyZ3950.zdefs import *
from PyZ3950 import oids

import random
rand = random.Random()

from PyZ3950 import CQLParser
asn1.register_oid(Z3950_QUERY_SQL, SQLQuery)
asn1.register_oid(Z3950_QUERY_CQL, asn1.GeneralString)

from cheshire3.baseObjects import Session, Database, Transformer, Workflow
from cheshire3.server import SimpleServer
from cheshire3 import internal
from cheshire3 import cqlParser

session = Session()
session.environment = "apache"
server = SimpleServer(session, os.path.join(internal.cheshire3Root, 'configs', 'serverConfig.xml'))
configs = {}
dbmap = {}
server._cacheDatabases(session)
for db in server.databases.values():
    if db.get_setting(session, "z3950"):
        db._cacheProtocolMaps(session)
	map1 = db.protocolMaps.get('http://www.loc.gov/z3950/', None)
	if map1:
	    configs[map1.databaseName] = map1
	    dbmap[db.id] = map1.databaseName

session.resultSetStore = server.get_path(session, 'resultSetStore')
session.logger = server.get_path(session, 'z3950Logger')
コード例 #31
0
ファイル: dickensHandler.py プロジェクト: cheshire3/clic
from cheshire3.server import SimpleServer

# C3 web search utils
from cheshire3.web.www_utils import *

# separate file containing display configs + some HMTL for table rows etc.
from clic.dickens.web.dickensWebConfig import *
from clic.dickens.web.dickensSearchHandler import SearchHandler
from clic.dickens.web.dickensBrowseHandler import BrowseHandler

cheshirePath = os.environ.get('HOME', '/home/cheshire')

logPath = os.path.join(cheshirePath, 'clic', 'www', databaseName, 'logs', 'searchHandler.log')
htmlPath = os.path.join(cheshirePath, 'clic', 'www', databaseName, 'html')

session = Session()
session.environment = 'apache'
session.user = None
serv = SimpleServer(session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml'))

session.database = 'db_dickens'
db = serv.get_object(session, session.database)
authStore = db.get_object(session, 'authStore')


# Discover objects...
def handler(req):
    global db, htmlPath, logPath, cheshirePath, xmlp, recordStore
    form = FieldStorage(req)
    try:
        dir = req.uri[1:].rsplit('/')[1]
コード例 #32
0
        raise NoSetHierarchyError()

    # End Cheshire3OaiServer ------------------------------------------------


def get_databasesAndConfigs(session, serv):
    """Get and return database and config mappings from Server."""
    dbs = {}
    configs = {}
    serv._cacheDatabases(session)
    for db in serv.databases.values():
        if db.get_setting(session, 'oai-pmh'):
            db._cacheProtocolMaps(session)
            pmap = db.protocolMaps.get(
                'http://www.openarchives.org/OAI/2.0/OAI-PMH', None)
            # Check that there's a path and that it can actually be requested
            # from this handler
            if (pmap is not None):
                configs[pmap.databaseName] = pmap
                dbs[pmap.databaseName] = db
    return dbs, configs


# Cheshire3 architecture
session = Session()
serv = SimpleServer(session,
                    os.path.join(cheshire3Root, 'configs', 'serverConfig.xml'))
lxmlParser = serv.get_object(session, 'LxmlParser')
dbs, configs = get_databasesAndConfigs(session, serv)
c3OaiServers = {}
コード例 #33
0
## count words in books, and list titles
## used to create booklist

import os
import re
from lxml import etree
import json

from cheshire3.document import StringDocument
from cheshire3.internal import cheshire3Root
from cheshire3.server import SimpleServer
from cheshire3.baseObjects import Session

session = Session()
session.database = 'db_dickens'
serv = SimpleServer(session,
                    os.path.join(cheshire3Root, 'configs', 'serverConfig.xml'))
db = serv.get_object(session, session.database)
qf = db.get_object(session, 'defaultQueryFactory')
resultSetStore = db.get_object(session, 'resultSetStore')
idxStore = db.get_object(session, 'indexStore')

list_books = [
    'BH', 'BR', 'DC', 'DS', 'ED', 'GE', 'HT', 'LD', 'MC', 'NN', 'OCS', 'OMF',
    'OT', 'PP', 'TTC', 'AgnesG', 'Antoni', 'arma', 'cran', 'Deronda',
    'dracula', 'emma', 'frank', 'jane', 'Jude', 'LadyAud', 'mary', 'NorthS',
    'persuasion', 'pride', 'sybil', 'Tess', 'basker', 'Pomp', 'mill', 'dorian',
    'Prof', 'native', 'alli', 'Jekyll', 'wwhite', 'vanity', 'VivianG', 'wh'
]

titles = {
コード例 #34
0
ファイル: testSession.py プロジェクト: bloomonkey/cheshire3
 def test_sessionEnvironmentAssign(self):
     session = Session()
     session.environment = "apache"
     self.assertEqual(session.environment, "apache", "session.environment assignment failed")
     session.environment = "terminal"
     self.assertEqual(session.environment, "terminal", "session.environment re-assignment failed")
コード例 #35
0
import random
rand = random.Random()

from PyZ3950 import CQLParser
asn1.register_oid(Z3950_QUERY_SQL, SQLQuery)
asn1.register_oid(Z3950_QUERY_CQL, asn1.GeneralString)

from cheshire3.baseObjects import Session, Database, Transformer, Workflow
from cheshire3.server import SimpleServer
from cheshire3 import internal
from cheshire3 import cqlParser

cheshirePath = os.environ.get('C3HOME', '/home/cheshire')

session = Session()
session.environment = "apache"
server = SimpleServer(
    session,
    os.path.join(cheshirePath, 'cheshire3', 'configs', 'serverConfig.xml'))
configs = {}
dbmap = {}
server._cacheDatabases(session)
for db in server.databases.values():
    if db.get_setting(session, "z3950"):
        db._cacheProtocolMaps(session)
        map1 = db.protocolMaps.get('http://www.loc.gov/z3950/', None)
        if map1:
            configs[map1.databaseName] = map1
            dbmap[db.id] = map1.databaseName
コード例 #36
0
ファイル: testSession.py プロジェクト: bloomonkey/cheshire3
 def test_sessionDatabaseAssign(self):
     session = Session()
     session.database = "db_test1"
     self.assertEqual(session.database, "db_test1", "session.database assignment failed")
     session.database = "db_test2"
     self.assertEqual(session.database, "db_test2", "session.database re-assignment failed")
コード例 #37
0
# from cheshire3.utils import reader
from cheshire3.baseObjects import Session

# Apache Config:
#<Directory /usr/local/apache2/htdocs/srw>
#  SetHandler mod_python
#  PythonDebug On
#  PythonPath "['/home/cheshire/c3/code', '/usr/local/lib/python2.3/lib-dynload']+sys.path"
#  PythonHandler srwApacheHandler
#</Directory>

# NB. SetHandler, not AddHandler.

cheshirePath = os.environ.get('C3HOME', '/home/cheshire')

session = Session()
session.environment = "apache"
serv = SimpleServer(
    session,
    os.path.join(cheshirePath, 'cheshire3', 'configs', 'serverConfig.xml'))

configs = {}
serv._cacheDatabases(session)
for db in serv.databases.values():
    if db.get_setting(session, 'SRW') or db.get_setting(session, 'srw'):
        db._cacheProtocolMaps(session)
        map = db.protocolMaps.get('http://www.loc.gov/zing/srw/', None)
        map2 = db.protocolMaps.get('http://www.loc.gov/zing/srw/update/', None)
        configs[map.databaseUrl] = {
            'http://www.loc.gov/zing/srw/': map,
            'http://www.loc.gov/zing/srw/update/': map2
コード例 #38
0
class Cheshire3Engine(BaseEngine):
    #schema = Schema(title=TEXT(stored=True), path=TEXT(stored=True), href=ID(stored=True), cfiBase=TEXT(stored=True), spinePos=TEXT(stored=True), content=TEXT)
    #database = 'db_tdo_simple_sru'
    cheshire_metadata_dir = '/cheshire3-metadata'
    session = Session()
    serverConfig = os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')
    server = SimpleServer(session, serverConfig)
    queryFactory = None
    db = None
    titleSel = None
    anywhereSel = None
    proxExtractor = None

    def __initializeTitleSelector(self):
        try:
            self.titleSel = self.db.get_object(self.session,
                                               'titleXPathSelector')
        except ObjectDoesNotExistException:
            try:
                self.titleSel = self.db.get_object(self.session,
                                                   'titleSelector')
            except ObjectDoesNotExistException as e:
                print e

    def __initializeAnywhereSelector(self):
        try:
            self.anywhereSel = self.db.get_object(self.session,
                                                  'anywhereXPathSelector')
        except ObjectDoesNotExistException as e:
            print e

    def __initializeProximityExtractor(self):
        try:
            self.proxExtractor = self.db.get_object(self.session,
                                                    'ProxExtractor')
        except ObjectDoesNotExistException as e:
            print e

    def __highlight(self, text, term, n):
        """Searches for text, retrieves n words either side of the text, which are retuned seperately"""
        term_concordance = list()
        text_len = len(text)
        term_len = len(term)
        term_indexes = [w.start() for w in re.finditer(term, text)]
        for idx in term_indexes:
            start = idx - n
            end = text_len if (idx + term_len +
                               n) > text_len else idx + term_len + n
            term_concordance.append(text[start:idx] +
                                    '<b class="match term0">' + term + '</b>' +
                                    text[idx:end])

        return term_concordance

    def open(self):
        """ The Cheshire get_object line should throw an exception if it can't 
        open passed db
        """
        try:
            self.db = self.server.get_object(self.session, self.databaseName)
            self.session.database = self.databaseName
        except Exception as e:
            print e
            print "openning database {} failed".format(self.databaseName)

    def create(self):
        if not os.path.exists(self.databasePath):
            os.makedirs(self.databasePath)

        # create cheshire metadata directory if needed, then initialize with empty list
        metadata_path = self.databasePath + self.cheshire_metadata_dir
        if not os.path.exists(metadata_path):
            os.makedirs(metadata_path)
        with open(metadata_path + '/' + self.databaseName, 'w') as f:
            json.dump({}, f)

        try:
            print "openning database {} to create".format(self.databasePath)
            os.system("cheshire3-init " + self.databasePath + " --database=" +
                      self.databaseName)
        except Exception, e:
            print e
コード例 #39
0
#!/usr/bin/python
import sys
import os
from cheshire3.baseObjects import Session
from cheshire3.server import SimpleServer
from cheshire3.internal import cheshire3Root

# Build environment...
session = Session() # a Session - used to store
print cheshire3Root

serv = SimpleServer(session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml'))
session.logger = serv.get_path(session, 'defaultLogger') # a logger
db = serv.get_object(session, 'db_tdo_index') # the Database
session.database = db.id

#qf = db.get_object(session, 'defaultQueryFactory')

def testVec():
    recordStore = db.get_object(session, 'recordStore')
    rec = recordStore.fetch_record(session, 1)
    idx= db.get_object(session, 'idx-topic')
    vec = idx.fetch_vector(session, rec)
コード例 #40
0
class Cheshire3Engine(BaseEngine):
    #schema = Schema(title=TEXT(stored=True), path=TEXT(stored=True), href=ID(stored=True), cfiBase=TEXT(stored=True), spinePos=TEXT(stored=True), content=TEXT)
    #database = 'db_tdo_simple_sru'
    cheshire_metadata_dir = '/cheshire3-metadata'
    session = Session()
    serverConfig = os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')
    server = SimpleServer(session, serverConfig)
    queryFactory = None
    db = None
    titleSel = None
    anywhereSel = None
    proxExtractor = None

    def __initializeTitleSelector(self):
        try:
            self.titleSel = self.db.get_object(self.session,
                                               'titleXPathSelector')
        except ObjectDoesNotExistException:
            try:
                self.titleSel = self.db.get_object(self.session,
                                                   'titleSelector')
            except ObjectDoesNotExistException as e:
                logging.error(e)

    def __initializeAnywhereSelector(self):
        try:
            self.anywhereSel = self.db.get_object(self.session,
                                                  'anywhereXPathSelector')
        except ObjectDoesNotExistException as e:
            logging.error(e)

    def __initializeProximityExtractor(self):
        try:
            self.proxExtractor = self.db.get_object(self.session,
                                                    'ProxExtractor')
        except ObjectDoesNotExistException as e:
            logging.error(e)

    def __highlight(self, text, term, n):
        """Searches for text, retrieves n words either side of the text, which are retuned seperately"""
        term_concordance = list()
        text_len = len(text)
        term_len = len(term)
        term_indexes = [w.start() for w in re.finditer(term, text)]
        for idx in term_indexes:
            start = idx - n
            end = text_len if (idx + term_len +
                               n) > text_len else idx + term_len + n
            term_concordance.append(text[start:idx] +
                                    '<b class="match term0">' + term + '</b>' +
                                    text[idx:end])

        return term_concordance

    def open(self):
        """ The Cheshire get_object line should throw an exception if it can't 
        open passed db
        """
        try:
            self.db = self.server.get_object(self.session, self.database_name)
            self.session.database = self.database_name
        except Exception as e:
            logging.error(e)
            logging.error("openning database {} failed".format(
                self.database_name))

    def create(self):
        if not os.path.exists(self.database_path):
            os.makedirs(self.database_path)

        # create cheshire metadata directory if needed, then initialize with empty list
        metadata_path = self.database_path + self.cheshire_metadata_dir
        if not os.path.exists(metadata_path):
            os.makedirs(metadata_path)
        with open(metadata_path + '/' + self.database_name, 'w') as f:
            json.dump({}, f)

        try:
            logging.info("openning database {} to create".format(
                self.database_path))
            os.system("cheshire3-init " + self.database_path + " --database=" +
                      self.database_name)
        except Exception as e:
            logging.error(e)

    def add(self, path='', href='', title='', cfiBase='', spinePos=''):
        # first, index the document in cheshire3 using unix commands
        os.system("cheshire3-load --database=" + self.database_name + ' ' +
                  path)

        doc_md = dict()
        doc_md[href] = {
            'path': path,
            'href': href,
            'title': title,
            'cfiBase': cfiBase,
            'spinePos': spinePos
        }
        # title is not populated, so pulling filename from path prefix
        #filename = path[:path.find('/')] + '.json'
        metadata_path = self.database_path + self.cheshire_metadata_dir
        with open(metadata_path + '/' + self.database_name) as f_in:
            md_dict = json.load(f_in)

        md_dict.update(doc_md)

        with open(metadata_path + '/' + self.database_name, 'w') as f_out:
            json.dump(md_dict, f_out)
        #print "Current Path for directory writing: " + os.getcwd()

    def finished(self):
        """ In Cheshire, there are no cleanup commands that are needed.  The add command
            will index specified documents fully and end, so a finished command is not required.
        """
        pass

    def query(self, q, limit=None):
        """ In Cheshire3, you have to specify an index and query, else it defaults the all index  which utilizes simple extraction.
        """

        if self.queryFactory == None:
            self.queryFactory = self.db.get_object(self.session,
                                                   'defaultQueryFactory')

        if self.titleSel is None:
            self.__initializeTitleSelector()

        if self.anywhereSel is None:
            self.__initializeAnywhereSelector()

        if self.proxExtractor is None:
            self.__initializeProximityExtractor()

        c3Query = self.queryFactory.get_query(self.session, q)
        rs = self.db.search(self.session, c3Query)

        # open up the json file with reader specific attributes
        metadata_path = self.database_path + self.cheshire_metadata_dir
        with open(metadata_path + '/' + self.database_name) as f:
            db_md_dict = json.load(f)

        # loop through recordset, create new results list with dictionary of found values
        results = list()
        for rsi in rs:
            rec = rsi.fetch_record(self.session)
            # check the record titles
            titleData = self.titleSel.process_record(self.session, rec)
            # checking out the proximity attributes
            elems = self.anywhereSel.process_record(self.session, rec)
            doc_dict = self.proxExtractor.process_xpathResult(
                self.session, elems).values()[0]
            concordance = self.__highlight(doc_dict['text'], q, 20)
            pdb.set_trace()
            # extracts document name key
            fn_key = os.path.basename(titleData[3][0])
            # append highlighted concordance to the dictionary
            db_md_dict[fn_key][u'highlight'] = "  ".join(concordance)
            results.append(db_md_dict[fn_key])
        return results
コード例 #41
0
ファイル: run.py プロジェクト: bloomonkey/archiveshub
            elif (o == '--addsuperuser'):
                return addSuperUser()
    except UsageError as err:
        sys.stderr.write(str(err) + '\n')
        sys.stderr.write("for help use --help\n")
        sys.stderr.flush()
        return 2
    except Error as e:
        lgr.log_lvl(session, 40, str(e))
        if debug:
            raise
        return 1


# Build environment...
session = Session()
serv = SimpleServer(
    session,
    os.path.join(cheshire3Root,
                 'configs',
                 'serverConfig.xml'
                 )
)
session.database = 'db_hubedit'

db = serv.get_object(session, 'db_hubedit')
lgr = db.get_path(session, 'defaultLogger')
authStore = db.get_object(session, 'hubAuthStore')
superAuthStore = db.get_object(session, 'hubSuperAuthStore')

xmlp = db.get_object(session, 'LxmlParser')
コード例 #42
0
ファイル: run.py プロジェクト: cheshire3/clic
import getpass
import os
import sys
import traceback

from lxml import etree
from crypt import crypt

import cheshire3

from cheshire3.baseObjects import Session
from cheshire3.server import SimpleServer
from cheshire3.internal import cheshire3Root
from cheshire3.document import StringDocument

session = Session()

serverConfig = os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')
serv = SimpleServer(session, serverConfig)

db = serv.get_object(session, 'db_dickens')
session.database = 'db_dickens'

qf = db.get_object(session, 'defaultQueryFactory')
df = db.get_object(session, 'SimpleDocumentFactory')

concStore = db.get_object(session, 'concordanceStore')

authStore = db.get_object(session, 'authStore')

recStore = db.get_object(session, 'recordStore')