def removeWordIdsForDocId(self, docid, wids): for wid in wids: try: if isinstance(self._forward_idx[wid], int): del self._forward_idx[wid] else: self._forward_idx[wid].remove(docid) if len(self._forward_idx[wid])==0: del self._forward_idx[wid] except KeyError: pass old_wids = decode(self._reverse_idx[docid]) new_wids = [w for w in old_wids if w not in wids] self._reverse_idx[docid] = encode(new_wids) self._frequencies[docid] = self._get_frequencies(new_wids)
def insert(self, wids, docid): """ insert entries: wids is either an integer or a sequence of integers. docid is an integer. """ if isinstance(wids, int): wids = [wids] idx = self._forward_idx for wid in wids: try: idx[wid].insert(docid) except KeyError: idx[wid] = docid except: olddocid = idx[wid] idx[wid] = IITreeSet([olddocid, docid]) if not self._reverse_idx.has_key(docid): self._length.change(1) self._reverse_idx[docid] = encode(wids) self._frequencies[docid] = self._get_frequencies(wids)