def removeWordIdsForDocId(self, docid, wids): for wid in wids: try: if isinstance(self._forward_idx[wid], int): del self._forward_idx[wid] else: self._forward_idx[wid].remove(docid) if len(self._forward_idx[wid])==0: del self._forward_idx[wid] except KeyError: pass old_wids = decode(self._reverse_idx[docid]) new_wids = [w for w in old_wids if w not in wids] self._reverse_idx[docid] = encode(new_wids) self._frequencies[docid] = self._get_frequencies(new_wids)
def removeDocument(self, docid): """ remove a document and all its words from the storage """ try: wids = decode(self._reverse_idx[docid]) except KeyError: return del self._reverse_idx[docid] del self._frequencies[docid] for wid in wids: try: if isinstance(self._forward_idx[wid], int): del self._forward_idx[wid] else: self._forward_idx[wid].remove(docid) except KeyError: pass try: if len(self._forward_idx[wid]) == 0: del self._forward_idx[wid] except KeyError: pass self._length.change(-1)
def getWordIdsForDocId(self, docid): """ return a sequence of words contained in the document with ID 'docId' """ return decode(self._reverse_idx[docid])