def __init__(self, name, create=False, analyzer=None): # XXX FIXME: we should really try opening the db here, so that # any errors are caught immediately rather than waiting for the # first time we try to do something... ReadOnlyIndex.__init__(self, name) self.name = name if create: self.flags = xapian.DB_CREATE_OR_OPEN else: self.flags = xapian.DB_OPEN self.analyzer = analyzer or StandardAnalyzer() self.lockFile = FilesystemLock(os.path.join(self.name, XAPWRAP_LOCK_FILENAME))
class Index(ReadOnlyIndex): def __init__(self, name, create=False, analyzer=None): # XXX FIXME: we should really try opening the db here, so that # any errors are caught immediately rather than waiting for the # first time we try to do something... ReadOnlyIndex.__init__(self, name) self.name = name if create: self.flags = xapian.DB_CREATE_OR_OPEN else: self.flags = xapian.DB_OPEN self.analyzer = analyzer or StandardAnalyzer() self.lockFile = FilesystemLock(os.path.join(self.name, XAPWRAP_LOCK_FILENAME)) def _setupDB(self): """ really get a xapian database object """ # xapian expects directories! self.name should refer to a # directory. if it doesn't exist, we'll make one. if not os.path.exists(self.name): os.mkdir(self.name) # try to acquire a lock file if not self.lockFile.lock(): owningPid = os.readlink(self.lockFile.name) errorMsg = "cannot acquire lock file for xapian index %s" "because it is owned by process %s" % ( self.name, owningPid, ) log(errorMsg) raise DatabaseLockError(errorMsg) xapLockFilePath = os.path.join(self.name, XAPIAN_LOCK_FILENAME) if os.path.exists(xapLockFilePath): log("Stale database lock found in %s. Deleting it now." % xapLockFilePath) os.remove(xapLockFilePath) # actually try to open a xapian DB try: try: self.db = ExceptionTranslater.openIndex(False, self.name, self.flags) except DatabaseCorruptionError, e: # the index is trashed, so there's no harm in blowing it # away and starting from scratch log("Xapian index at %s is corrupted and will be destroyed" % self.name) if self.lockFile.locked: self.lockFile.unlock() for idxFname in glob.glob(os.path.join(self.name, "*")): os.remove(idxFname) self.db = ExceptionTranslater.openIndex(False, self.name, self.flags) finally: if self.db is None and self.lockFile.locked: self.lockFile.unlock() def __del__(self): self.close() def close(self): # this is important! the only way to get xapian to release the # db lock is to call the db object's destructor. that won't # happen until nobody is holding a reference to the db # object. unfortunately, the query parser holds a reference to # it, so the query parser must also go away. do not hold # references to these objects anywhere but here. # enquire objects and mset objects hold a reference to the db, # so if any of them are left alive, the db will not be reclaimed if self.db is not None: ReadOnlyIndex.close(self) # the islink test is needed in case the index directory has # been deleted before we close was called. if self.lockFile.locked and os.path.islink(self.lockFile.name): self.lockFile.unlock() # there is no point in checking if the lock file is still # around right here: it will only be deleted when xapian's # destructor runs, but python defers running destructors # until after exception handling is complete. since this # code will often get called from an exception handler, we # have to assume that the lock file's removal will be # delayed at least until after this method exits def get_document(self, uid): return self._get_document(uid) # methods that modify db state def index(self, doc): self.setupDB() if hasattr(doc, "uid") and doc.uid: uid = int(doc.uid) doc.sortFields.append(SortKey("uid", uid)) doc.keywords.append(Keyword("uid", str(uid))) xapDoc = doc.toXapianDocument(self.indexValueMap, self.prefixMap) self.replace_document(uid, xapDoc) else: # We need to know the uid of the doc we're going to add # before we add it so we can setup appropriate uid sorting # values. But, another thread could potentially insert a # document at that uid after we determine the last uid, but # before we manage the insertion. Yay race conditions! So we # try to add the document and then check that it ended up at # the right uid. If it did not, we update it with the # correct uid sort values. uid = self.get_lastdocid() + 1 doc.sortFields.append(SortKey("uid", uid)) doc.keywords.append(Keyword("uid", str(uid))) xapDoc = doc.toXapianDocument(self.indexValueMap, self.prefixMap) newUID = self.add_document(xapDoc) if newUID != uid: doc.sortFields.append(SortKey("uid", newUID)) doc.keywords.append(Keyword("uid", str(newUID))) xapDoc = doc.toXapianDocument(self.indexValueMap, self.prefixMap) self.replace_document(newUID, xapDoc) # a simpler alternative would be to add an empty document # and then replace it. the problem with that strategy is # that it kills performance since xapian performs an # implicit flush when you replace a document that was added # but not yet committed to disk. self.amountIndexedSinceLastFlush += len(doc) if self.amountIndexedSinceLastFlush > MAX_DATA_INDEXED_BETWEEN_FLUSHES: self.flush() return uid def add_document(self, doc): return self.db.add_document(doc) add_document = makeProtectedDBMethod(add_document) def replace_document(self, uid, doc): return self.db.replace_document(uid, doc) replace_document = makeProtectedDBMethod(replace_document) def delete_document(self, docID): return self.db.delete_document(docID) delete_document = makeProtectedDBMethod(delete_document)