def checkIndex(self, maxID): """Compute a list of all UIDs less than or equal to maxID that are not in the db. """ # I had originally suspected that the performance hit of # returning a huge list in the case of empty indexes would be # substantial, but testing with a 120,000 msg index indicates # that performance is fine and that the space overhead is quite # reasonable. If that were not the case, this could be optimized # by calculating the maximum document ID in the index and only # scanning up to the minimum of maxID and the max ID in the # index, assuming that were using the same document IDs in the # index as in atop. missingUIDs = [] for uid in xrange(maxID + 1): term = makePairForWrite("UID", str(uid)) if not self.db.term_exists(term): missingUIDs.append(uid) return missingUIDs
def get_documents(self, uid): """ return a list of remapped UIDs corresponding to the actual UID given """ docTerm = makePairForWrite("UID", str(uid)) candidates = self.search(RawQuery(docTerm)) return [int(c["uid"]) for c in candidates]