Esempio n. 1
0
    def update_document(self, **fields):
        self._check_state()
        _unique_cache = self._unique_cache

        # Check which of the supplied fields are unique
        unique_fields = [
            name for name, field in self.schema.items()
            if name in fields and field.unique
        ]
        if not unique_fields:
            raise IndexingError("None of the fields in %r"
                                " are unique" % fields.keys())

        # Delete documents matching the unique terms
        delset = set()
        for name in unique_fields:
            field = self.schema[name]
            text = field.to_text(fields[name])

            # If we've seen an update_document with this unique field before...
            if name in _unique_cache:
                # Get the cache for this field
                term2docnum = _unique_cache[name]

                # If the cache is None, that means we've seen this field once
                # before but didn't cache it the first time. Cache it now.
                if term2docnum is None:
                    # Read the first document number found for every term in
                    # this field and cache the mapping from term to doc num
                    term2docnum = {}
                    s = self.searcher()
                    term2docnum = dict(s.first_ids(name))
                    s.close()
                    _unique_cache[name] = term2docnum

                # Look up the cached document number for this term
                if text in term2docnum:
                    delset.add(term2docnum[text])
            else:
                # This is the first time we've seen an update_document with
                # this field. Mark it by putting None in the cache for this
                # field, but don't cache it. We'll only build the cache if we
                # see an update_document on this field again. This is to
                # prevent caching a field even when the user is only going to
                # call update_document once.
                reader = self.searcher().reader()
                try:
                    delset.add(reader.postings(name, text).id())
                    _unique_cache[name] = None
                except TermNotFound:
                    pass
                finally:
                    reader.close()

        # Delete the old docs
        for docnum in delset:
            self.delete_document(docnum)

        # Add the given fields
        self.add_document(**fields)
Esempio n. 2
0
 def delete_document(self, docnum, delete=True):
     self._check_state()
     if docnum >= sum(seg.doccount for seg in self.segments):
         raise IndexingError("No document ID %r in this index" % docnum)
     segment, segdocnum = self._segment_and_docnum(docnum)
     segment.delete_document(segdocnum, delete=delete)
Esempio n. 3
0
 def _check_state(self):
     if self.is_closed:
         raise IndexingError("This writer is closed")