Beispiel #1
0
 def __init__(self, store):
     self.store = store
     self.session = Session()
     self.cxn = store._open(self.session, 'byteCount')
     self.cursor = self.cxn.cursor()
     (key, val) = self.cursor.first()
     self.nextData = (key, self.store.fetch_data(self.session, key))
Beispiel #2
0
def process_update(self, req):
    self.version = "1.1"
    self.operationStatus = "fail"

    if (not req.version):
        diag = Diagnostic7()
        diag.message = "Mandatory 'version' parameter not supplied"
        diag.details = 'version'
        raise diag
    config = req.config
    db = config.parent
    req._db = db
    session = Session()
    session.environment = "apache"
    session.database = db.id

    if req.operation == "info:srw/operation/1/create":
        # Do Create
        self.handle_create(session, req)
    elif req.operation == "info:srw/operation/1/replace":
        # Do Replace
        self.handle_replace(session, req)
    elif req.operation == "info:srw/operation/1/delete":
        # Do Delete
        self.handle_delete(session, req)
    elif req.operation == "info:srw/operation/1/metadata":
        # Do Metadata update
        self.handle_metadata(session, req)
    else:
        # Barf
        diag = SRWDiagnostics.Diagnostic1()
        diag.details = "Unknown operation: %s" % req.operation
        self.diagnostics = [diag]
Beispiel #3
0
 def __init__(self):
     self.session = Session()
     self.session.database = 'db_dickens'
     self.serv = SimpleServer(
         self.session,
         os.path.join(cheshire3Root, 'configs', 'serverConfig.xml'))
     self.db = self.serv.get_object(self.session, self.session.database)
     self.qf = self.db.get_object(self.session, 'defaultQueryFactory')
Beispiel #4
0
 def test_sessionDatabaseAssign(self):
     session = Session()
     session.database = "db_test1"
     self.assertEqual(session.database, "db_test1",
                      "session.database assignment failed")
     session.database = "db_test2"
     self.assertEqual(session.database, "db_test2",
                      "session.database re-assignment failed")
Beispiel #5
0
 def test_sessionEnvironmentAssign(self):
     session = Session()
     session.environment = "apache"
     self.assertEqual(session.environment, "apache",
                      "session.environment assignment failed")
     session.environment = "terminal"
     self.assertEqual(session.environment, "terminal",
                      "session.environment re-assignment failed")
Beispiel #6
0
 def setUp(self):
     self.session = Session()
     self.records = []
     cls = self._get_class()
     for d in self._get_data():
         recHash = {'xml': d,
                    'record': cls(self._parse_data(d),
                                  xml=d,
                                  byteCount=len(d))
                    }
         
         self.records.append(recHash)
Beispiel #7
0
 def __init__(self):
     '''
     Sets up the connection with Cheshire3. 
     '''
     self.session = Session()
     self.session.database = 'db_dickens'
     self.serv = SimpleServer(
         self.session,
         os.path.join(cheshire3Root, 'configs', 'serverConfig.xml'))
     self.db = self.serv.get_object(self.session, self.session.database)
     self.qf = self.db.get_object(self.session, 'defaultQueryFactory')
     self.resultSetStore = self.db.get_object(self.session,
                                              'resultSetStore')
     self.idxStore = self.db.get_object(self.session, 'indexStore')
 def setUp(self):
     self.session = Session()
     serverConfig = os.path.join(cheshire3Root, 'configs',
                                 'serverConfig.xml')
     self.server = SimpleServer(self.session, serverConfig)
     for config in self._get_dependencyConfigs():
         identifier = config.get('id')
         self.server.subConfigs[identifier] = config
     # Disable stdout logging
     lgr = self.server.get_path(self.session, 'defaultLogger')
     lgr.minLevel = 60
     # Create object that will be tested
     config = self._get_config()
     self.testObj = makeObjectFromDom(self.session, config, self.server)
Beispiel #9
0
 def setUp(self):
     self.session = Session()
     self.testPairs = [
         ('application/xml', '<doc><foo/><bar><baz/></baz></doc>', []),
         ('text/plain', 'This is my document!', ['aProcessingObject'])
     ]
     self.testDocs = []
     for mt, data, processHistory in self.testPairs:
         self.testDocs.append(
             StringDocument(data,
                            mimeType=mt,
                            creator=id(self),
                            history=processHistory,
                            byteCount=len(data),
                            wordCount=len(data.split(' '))))
Beispiel #10
0
    def __init__(self):
        '''
        Set up a cheshire3 session/connection to the database. This initilisation does
        not handle the actual search term (cf. build_and_run_query).
        '''

        self.session = Session()
        self.session.database = 'db_dickens'
        self.serv = SimpleServer(self.session,
                                 os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')
                                 )
        self.db = self.serv.get_object(self.session, self.session.database)
        self.qf = self.db.get_object(self.session, 'defaultQueryFactory')
        self.resultSetStore = self.db.get_object(self.session, 'resultSetStore')
        self.idxStore = self.db.get_object(self.session, 'indexStore')
Beispiel #11
0
    def setUp(self):
        """Setup some ResultsetItems and put them into ResultSets to evaluate.

        N.B. a == b, other pairs should not evaluate as equal
        """
        self.session = session = Session()
        # Set up same 4 ResultSetItems as for SimpleResultSetItemTestCase
        self.rsi1 = SimpleResultSetItem(session,
                                        id=0,
                                        recStore="recordStore",
                                        occs=5,
                                        database="",
                                        diagnostic=None,
                                        weight=0.5,
                                        resultSet=None,
                                        numeric=None)
        self.rsi2 = SimpleResultSetItem(session,
                                        id=0,
                                        recStore="recordStore",
                                        occs=3,
                                        database="",
                                        diagnostic=None,
                                        weight=0.5,
                                        resultSet=None,
                                        numeric=None)
        self.rsi3 = SimpleResultSetItem(session,
                                        id=1,
                                        recStore="recordStore",
                                        occs=1,
                                        database="",
                                        diagnostic=None,
                                        weight=0.5,
                                        resultSet=None,
                                        numeric=None)
        self.rsi4 = SimpleResultSetItem(session,
                                        id=0,
                                        recStore="recordStore2",
                                        occs=2,
                                        database="",
                                        diagnostic=None,
                                        weight=0.5,
                                        resultSet=None,
                                        numeric=None)
        # Put identical (rsi1 and rsi2) into separate ResultSets
        self.a = SimpleResultSet(session, [self.rsi1, self.rsi3], id="a")
        self.b = SimpleResultSet(session, [self.rsi2, self.rsi4], id="b")
Beispiel #12
0
 def __init__(self, session, name=None, manager=None, debug=0):
     # This sets self.name
     mp.Process.__init__(self, name=name)
     self.inPipe = None
     self.debug = debug
     self.manager = manager
     # Reconstruct our own session, so as to not overwrite task
     self.session = Session(user=session.user,
                            logger=session.logger,
                            task=self.name,
                            database=session.database,
                            environment=session.environment)
     self.session.server = session.server
     self.server = session.server
     self.database = self.server.get_object(self.session, session.database)
     try:
         name = property(mp.Process.get_name, mp.Process.set_name)
     except AttributeError:
         pass
Beispiel #13
0
    def setUp(self):
        """Setup some ResultsetItems to evaluate.

        N.B. a == b, other pairs should not evaluate as equal
        """
        self.session = session = Session()
        self.a = SimpleResultSetItem(session,
                                     id=0,
                                     recStore="recordStore",
                                     occs=0,
                                     database="",
                                     diagnostic=None,
                                     weight=0.5,
                                     resultSet=None,
                                     numeric=None)
        self.b = SimpleResultSetItem(session,
                                     id=0,
                                     recStore="recordStore",
                                     occs=0,
                                     database="",
                                     diagnostic=None,
                                     weight=0.5,
                                     resultSet=None,
                                     numeric=None)
        self.c = SimpleResultSetItem(session,
                                     id=1,
                                     recStore="recordStore",
                                     occs=0,
                                     database="",
                                     diagnostic=None,
                                     weight=0.5,
                                     resultSet=None,
                                     numeric=None)
        self.d = SimpleResultSetItem(session,
                                     id=0,
                                     recStore="recordStore2",
                                     occs=0,
                                     database="",
                                     diagnostic=None,
                                     weight=0.5,
                                     resultSet=None,
                                     numeric=None)
Beispiel #14
0
 def test_sessionInstance(self):
     session = Session()
     self.assertIsInstance(session, Session)
# from cheshire3.utils import reader
from cheshire3.baseObjects import Session

# Apache Config:
#<Directory /usr/local/apache2/htdocs/srw>
#  SetHandler mod_python
#  PythonDebug On
#  PythonPath "['/home/cheshire/c3/code', '/usr/local/lib/python2.3/lib-dynload']+sys.path"
#  PythonHandler srwApacheHandler
#</Directory>

# NB. SetHandler, not AddHandler.

cheshirePath = os.environ.get('C3HOME', '/home/cheshire')

session = Session()
session.environment = "apache"
serv = SimpleServer(
    session,
    os.path.join(cheshirePath, 'cheshire3', 'configs', 'serverConfig.xml'))

configs = {}
serv._cacheDatabases(session)
for db in serv.databases.values():
    if db.get_setting(session, 'SRW') or db.get_setting(session, 'srw'):
        db._cacheProtocolMaps(session)
        map = db.protocolMaps.get('http://www.loc.gov/zing/srw/', None)
        map2 = db.protocolMaps.get('http://www.loc.gov/zing/srw/update/', None)
        configs[map.databaseUrl] = {
            'http://www.loc.gov/zing/srw/': map,
            'http://www.loc.gov/zing/srw/update/': map2
Beispiel #16
0
 def test_sessionEnvironmentInit(self):
     session = Session(environment="apache")
     self.assertEqual(session.environment, "apache")
Beispiel #17
0
 def test_sessionDatabaseInit(self):
     session = Session(database="db_test1")
     self.assertEqual(session.database, "db_test1")
Beispiel #18
0
def directoryRecordStoreIter(store):
    session = Session()
    for id_, data in directoryStoreIter(store):
        yield store._process_data(session, id_, data)
class Cheshire3Engine(BaseEngine):
    #schema = Schema(title=TEXT(stored=True), path=TEXT(stored=True), href=ID(stored=True), cfiBase=TEXT(stored=True), spinePos=TEXT(stored=True), content=TEXT)
    #database = 'db_tdo_simple_sru'
    cheshire_metadata_dir = '/cheshire3-metadata'
    session = Session()
    serverConfig = os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')
    server = SimpleServer(session, serverConfig)
    queryFactory = None
    db = None
    titleSel = None
    anywhereSel = None
    proxExtractor = None

    def __initializeTitleSelector(self):
        try:
            self.titleSel = self.db.get_object(self.session,
                                               'titleXPathSelector')
        except ObjectDoesNotExistException:
            try:
                self.titleSel = self.db.get_object(self.session,
                                                   'titleSelector')
            except ObjectDoesNotExistException as e:
                print e

    def __initializeAnywhereSelector(self):
        try:
            self.anywhereSel = self.db.get_object(self.session,
                                                  'anywhereXPathSelector')
        except ObjectDoesNotExistException as e:
            print e

    def __initializeProximityExtractor(self):
        try:
            self.proxExtractor = self.db.get_object(self.session,
                                                    'ProxExtractor')
        except ObjectDoesNotExistException as e:
            print e

    def __highlight(self, text, term, n):
        """Searches for text, retrieves n words either side of the text, which are retuned seperately"""
        term_concordance = list()
        text_len = len(text)
        term_len = len(term)
        term_indexes = [w.start() for w in re.finditer(term, text)]
        for idx in term_indexes:
            start = idx - n
            end = text_len if (idx + term_len +
                               n) > text_len else idx + term_len + n
            term_concordance.append(text[start:idx] +
                                    '<b class="match term0">' + term + '</b>' +
                                    text[idx:end])

        return term_concordance

    def open(self):
        """ The Cheshire get_object line should throw an exception if it can't 
        open passed db
        """
        try:
            self.db = self.server.get_object(self.session, self.databaseName)
            self.session.database = self.databaseName
        except Exception as e:
            print e
            print "openning database {} failed".format(self.databaseName)

    def create(self):
        if not os.path.exists(self.databasePath):
            os.makedirs(self.databasePath)

        # create cheshire metadata directory if needed, then initialize with empty list
        metadata_path = self.databasePath + self.cheshire_metadata_dir
        if not os.path.exists(metadata_path):
            os.makedirs(metadata_path)
        with open(metadata_path + '/' + self.databaseName, 'w') as f:
            json.dump({}, f)

        try:
            print "openning database {} to create".format(self.databasePath)
            os.system("cheshire3-init " + self.databasePath + " --database=" +
                      self.databaseName)
        except Exception, e:
            print e
class Cheshire3Engine(BaseEngine):
    #schema = Schema(title=TEXT(stored=True), path=TEXT(stored=True), href=ID(stored=True), cfiBase=TEXT(stored=True), spinePos=TEXT(stored=True), content=TEXT)
    #database = 'db_tdo_simple_sru'
    cheshire_metadata_dir = '/cheshire3-metadata'
    session = Session()
    serverConfig = os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')
    server = SimpleServer(session, serverConfig)
    queryFactory = None
    db = None
    titleSel = None
    anywhereSel = None
    proxExtractor = None

    def __initializeTitleSelector(self):
        try:
            self.titleSel = self.db.get_object(self.session,
                                               'titleXPathSelector')
        except ObjectDoesNotExistException:
            try:
                self.titleSel = self.db.get_object(self.session,
                                                   'titleSelector')
            except ObjectDoesNotExistException as e:
                logging.error(e)

    def __initializeAnywhereSelector(self):
        try:
            self.anywhereSel = self.db.get_object(self.session,
                                                  'anywhereXPathSelector')
        except ObjectDoesNotExistException as e:
            logging.error(e)

    def __initializeProximityExtractor(self):
        try:
            self.proxExtractor = self.db.get_object(self.session,
                                                    'ProxExtractor')
        except ObjectDoesNotExistException as e:
            logging.error(e)

    def __highlight(self, text, term, n):
        """Searches for text, retrieves n words either side of the text, which are retuned seperately"""
        term_concordance = list()
        text_len = len(text)
        term_len = len(term)
        term_indexes = [w.start() for w in re.finditer(term, text)]
        for idx in term_indexes:
            start = idx - n
            end = text_len if (idx + term_len +
                               n) > text_len else idx + term_len + n
            term_concordance.append(text[start:idx] +
                                    '<b class="match term0">' + term + '</b>' +
                                    text[idx:end])

        return term_concordance

    def open(self):
        """ The Cheshire get_object line should throw an exception if it can't 
        open passed db
        """
        try:
            self.db = self.server.get_object(self.session, self.database_name)
            self.session.database = self.database_name
        except Exception as e:
            logging.error(e)
            logging.error("openning database {} failed".format(
                self.database_name))

    def create(self):
        if not os.path.exists(self.database_path):
            os.makedirs(self.database_path)

        # create cheshire metadata directory if needed, then initialize with empty list
        metadata_path = self.database_path + self.cheshire_metadata_dir
        if not os.path.exists(metadata_path):
            os.makedirs(metadata_path)
        with open(metadata_path + '/' + self.database_name, 'w') as f:
            json.dump({}, f)

        try:
            logging.info("openning database {} to create".format(
                self.database_path))
            os.system("cheshire3-init " + self.database_path + " --database=" +
                      self.database_name)
        except Exception as e:
            logging.error(e)

    def add(self, path='', href='', title='', cfiBase='', spinePos=''):
        # first, index the document in cheshire3 using unix commands
        os.system("cheshire3-load --database=" + self.database_name + ' ' +
                  path)

        doc_md = dict()
        doc_md[href] = {
            'path': path,
            'href': href,
            'title': title,
            'cfiBase': cfiBase,
            'spinePos': spinePos
        }
        # title is not populated, so pulling filename from path prefix
        #filename = path[:path.find('/')] + '.json'
        metadata_path = self.database_path + self.cheshire_metadata_dir
        with open(metadata_path + '/' + self.database_name) as f_in:
            md_dict = json.load(f_in)

        md_dict.update(doc_md)

        with open(metadata_path + '/' + self.database_name, 'w') as f_out:
            json.dump(md_dict, f_out)
        #print "Current Path for directory writing: " + os.getcwd()

    def finished(self):
        """ In Cheshire, there are no cleanup commands that are needed.  The add command
            will index specified documents fully and end, so a finished command is not required.
        """
        pass

    def query(self, q, limit=None):
        """ In Cheshire3, you have to specify an index and query, else it defaults the all index  which utilizes simple extraction.
        """

        if self.queryFactory == None:
            self.queryFactory = self.db.get_object(self.session,
                                                   'defaultQueryFactory')

        if self.titleSel is None:
            self.__initializeTitleSelector()

        if self.anywhereSel is None:
            self.__initializeAnywhereSelector()

        if self.proxExtractor is None:
            self.__initializeProximityExtractor()

        c3Query = self.queryFactory.get_query(self.session, q)
        rs = self.db.search(self.session, c3Query)

        # open up the json file with reader specific attributes
        metadata_path = self.database_path + self.cheshire_metadata_dir
        with open(metadata_path + '/' + self.database_name) as f:
            db_md_dict = json.load(f)

        # loop through recordset, create new results list with dictionary of found values
        results = list()
        for rsi in rs:
            rec = rsi.fetch_record(self.session)
            # check the record titles
            titleData = self.titleSel.process_record(self.session, rec)
            # checking out the proximity attributes
            elems = self.anywhereSel.process_record(self.session, rec)
            doc_dict = self.proxExtractor.process_xpathResult(
                self.session, elems).values()[0]
            concordance = self.__highlight(doc_dict['text'], q, 20)
            pdb.set_trace()
            # extracts document name key
            fn_key = os.path.basename(titleData[3][0])
            # append highlighted concordance to the dictionary
            db_md_dict[fn_key][u'highlight'] = "  ".join(concordance)
            results.append(db_md_dict[fn_key])
        return results
Beispiel #21
0
 def test_sessionEnvironmentDefault(self):
     session = Session()
     self.assertEqual(session.environment, "terminal")