Example #1
0
 def __init__(self, name, create=False, analyzer=None):
     # XXX FIXME: we should really try opening the db here, so that
     # any errors are caught immediately rather than waiting for the
     # first time we try to do something...
     ReadOnlyIndex.__init__(self, name)
     self.name = name
     if create:
         self.flags = xapian.DB_CREATE_OR_OPEN
     else:
         self.flags = xapian.DB_OPEN
     self.analyzer = analyzer or StandardAnalyzer()
     self.lockFile = FilesystemLock(os.path.join(self.name, XAPWRAP_LOCK_FILENAME))
Example #2
0
class Index(ReadOnlyIndex):
    def __init__(self, name, create=False, analyzer=None):
        # XXX FIXME: we should really try opening the db here, so that
        # any errors are caught immediately rather than waiting for the
        # first time we try to do something...
        ReadOnlyIndex.__init__(self, name)
        self.name = name
        if create:
            self.flags = xapian.DB_CREATE_OR_OPEN
        else:
            self.flags = xapian.DB_OPEN
        self.analyzer = analyzer or StandardAnalyzer()
        self.lockFile = FilesystemLock(os.path.join(self.name, XAPWRAP_LOCK_FILENAME))

    def _setupDB(self):
        """ really get a xapian database object """

        # xapian expects directories! self.name should refer to a
        # directory. if it doesn't exist, we'll make one.
        if not os.path.exists(self.name):
            os.mkdir(self.name)

        # try to acquire a lock file
        if not self.lockFile.lock():
            owningPid = os.readlink(self.lockFile.name)
            errorMsg = "cannot acquire lock file for xapian index %s" "because it is owned by process %s" % (
                self.name,
                owningPid,
            )
            log(errorMsg)
            raise DatabaseLockError(errorMsg)
        xapLockFilePath = os.path.join(self.name, XAPIAN_LOCK_FILENAME)
        if os.path.exists(xapLockFilePath):
            log("Stale database lock found in %s. Deleting it now." % xapLockFilePath)
            os.remove(xapLockFilePath)

        # actually try to open a xapian DB
        try:
            try:
                self.db = ExceptionTranslater.openIndex(False, self.name, self.flags)
            except DatabaseCorruptionError, e:
                # the index is trashed, so there's no harm in blowing it
                # away and starting from scratch
                log("Xapian index at %s is corrupted and will be destroyed" % self.name)
                if self.lockFile.locked:
                    self.lockFile.unlock()
                for idxFname in glob.glob(os.path.join(self.name, "*")):
                    os.remove(idxFname)
                self.db = ExceptionTranslater.openIndex(False, self.name, self.flags)
        finally:
            if self.db is None and self.lockFile.locked:
                self.lockFile.unlock()

    def __del__(self):
        self.close()

    def close(self):
        # this is important! the only way to get xapian to release the
        # db lock is to call the db object's destructor. that won't
        # happen until nobody is holding a reference to the db
        # object. unfortunately, the query parser holds a reference to
        # it, so the query parser must also go away. do not hold
        # references to these objects anywhere but here.

        # enquire objects and mset objects hold a reference to the db,
        # so if any of them are left alive, the db will not be reclaimed

        if self.db is not None:
            ReadOnlyIndex.close(self)
            # the islink test is needed in case the index directory has
            # been deleted before we close was called.
            if self.lockFile.locked and os.path.islink(self.lockFile.name):
                self.lockFile.unlock()
            # there is no point in checking if the lock file is still
            # around right here: it will only be deleted when xapian's
            # destructor runs, but python defers running destructors
            # until after exception handling is complete. since this
            # code will often get called from an exception handler, we
            # have to assume that the lock file's removal will be
            # delayed at least until after this method exits

    def get_document(self, uid):
        return self._get_document(uid)

    # methods that modify db state

    def index(self, doc):
        self.setupDB()
        if hasattr(doc, "uid") and doc.uid:
            uid = int(doc.uid)
            doc.sortFields.append(SortKey("uid", uid))
            doc.keywords.append(Keyword("uid", str(uid)))
            xapDoc = doc.toXapianDocument(self.indexValueMap, self.prefixMap)
            self.replace_document(uid, xapDoc)
        else:
            # We need to know the uid of the doc we're going to add
            # before we add it so we can setup appropriate uid sorting
            # values. But, another thread could potentially insert a
            # document at that uid after we determine the last uid, but
            # before we manage the insertion. Yay race conditions! So we
            # try to add the document and then check that it ended up at
            # the right uid. If it did not, we update it with the
            # correct uid sort values.
            uid = self.get_lastdocid() + 1
            doc.sortFields.append(SortKey("uid", uid))
            doc.keywords.append(Keyword("uid", str(uid)))
            xapDoc = doc.toXapianDocument(self.indexValueMap, self.prefixMap)
            newUID = self.add_document(xapDoc)
            if newUID != uid:
                doc.sortFields.append(SortKey("uid", newUID))
                doc.keywords.append(Keyword("uid", str(newUID)))
                xapDoc = doc.toXapianDocument(self.indexValueMap, self.prefixMap)
                self.replace_document(newUID, xapDoc)

            # a simpler alternative would be to add an empty document
            # and then replace it. the problem with that strategy is
            # that it kills performance since xapian performs an
            # implicit flush when you replace a document that was added
            # but not yet committed to disk.

        self.amountIndexedSinceLastFlush += len(doc)
        if self.amountIndexedSinceLastFlush > MAX_DATA_INDEXED_BETWEEN_FLUSHES:
            self.flush()
        return uid

    def add_document(self, doc):
        return self.db.add_document(doc)

    add_document = makeProtectedDBMethod(add_document)

    def replace_document(self, uid, doc):
        return self.db.replace_document(uid, doc)

    replace_document = makeProtectedDBMethod(replace_document)

    def delete_document(self, docID):
        return self.db.delete_document(docID)

    delete_document = makeProtectedDBMethod(delete_document)