def update(self, index, iterable, commit=True):
        if not self.setup_complete:
            self.setup()

        self.index = self.index.refresh()
        writer = AsyncWriter(self.index)

        for obj in iterable:
            try:
                doc = index.full_prepare(obj)
            except SkipDocument:
                self.log.debug("Indexing for object `%s` skipped", obj)
            else:
                # Really make sure it's unicode, because Whoosh won't have it any
                # other way.
                for key in doc:
                    doc[key] = self._from_python(doc[key])

                # Document boosts aren't supported in Whoosh 2.5.0+.
                if "boost" in doc:
                    del doc["boost"]

                try:
                    writer.update_document(**doc)
                except Exception as e:
                    if not self.silently_fail:
                        raise

                    # We'll log the object identifier but won't include the actual object
                    # to avoid the possibility of that generating encoding errors while
                    # processing the log message:
                    self.log.error(
                        "%s while preparing object for update" %
                        e.__class__.__name__,
                        exc_info=True,
                        extra={
                            "data": {
                                "index": index,
                                "object": get_identifier(obj)
                            }
                        },
                    )

        if len(iterable) > 0:
            # For now, commit no matter what, as we run into locking issues otherwise.
            writer.commit()
            if writer.ident is not None:
                writer.join()
Exemple #2
0
def index_update(index, items):
    """
    :param:index: index name
    :param:items: list of (operation, full class name, primary key, data) tuples.
    """
    index_name = index
    index = service.app_state.indexes[index_name]
    adapted = service.adapted

    session = safe_session()
    updated = set()
    writer = AsyncWriter(index)
    try:
        for op, cls_name, pk, data in items:
            if pk is None:
                continue

            # always delete. Whoosh manual says that 'update' is actually delete + add
            # operation
            object_key = "{}:{}".format(cls_name, pk)
            writer.delete_by_term("object_key", object_key)

            adapter = adapted.get(cls_name)
            if not adapter:
                # FIXME: log to sentry?
                continue

            if object_key in updated:
                # don't add twice the same document in same transaction. The writer will
                # not delete previous records, ending in duplicate records for same
                # document.
                continue

            if op in ("new", "changed"):
                with session.begin(nested=True):
                    obj = adapter.retrieve(pk, _session=session, **data)

                if obj is None:
                    # deleted after task queued, but before task run
                    continue

                document = service.get_document(obj, adapter)
                try:
                    writer.add_document(**document)
                except ValueError:
                    # logger is here to give us more infos in order to catch a weird bug
                    # that happens regularly on CI but is not reliably
                    # reproductible.
                    logger.error("writer.add_document(%r)", document, exc_info=True)
                    raise
                updated.add(object_key)
    except BaseException:
        writer.cancel()
        raise

    session.close()
    writer.commit()
    try:
        # async thread: wait for its termination
        writer.join()
    except RuntimeError:
        # happens when actual writer was already available: asyncwriter didn't need
        # to start a thread
        pass
Exemple #3
0
def index_update(index, items):
    """
    :param:index: index name
    :param:items: list of (operation, full class name, primary key, data) tuples.
    """
    index_name = index
    index = service.app_state.indexes[index_name]
    adapted = service.adapted

    session = safe_session()
    updated = set()
    writer = AsyncWriter(index)
    try:
        for op, cls_name, pk, data in items:
            if pk is None:
                continue

            # always delete. Whoosh manual says that 'update' is actually delete + add
            # operation
            object_key = f"{cls_name}:{pk}"
            writer.delete_by_term("object_key", object_key)

            adapter = adapted.get(cls_name)
            if not adapter:
                # FIXME: log to sentry?
                continue

            if object_key in updated:
                # don't add twice the same document in same transaction. The writer will
                # not delete previous records, ending in duplicate records for same
                # document.
                continue

            if op in ("new", "changed"):
                with session.begin(nested=True):
                    obj = adapter.retrieve(pk, _session=session, **data)

                if obj is None:
                    # deleted after task queued, but before task run
                    continue

                document = service.get_document(obj, adapter)
                try:
                    writer.add_document(**document)
                except ValueError:
                    # logger is here to give us more infos in order to catch a weird bug
                    # that happens regularly on CI but is not reliably
                    # reproductible.
                    logger.error("writer.add_document(%r)", document, exc_info=True)
                    raise
                updated.add(object_key)
    except Exception:
        writer.cancel()
        raise

    session.close()
    writer.commit()
    try:
        # async thread: wait for its termination
        writer.join()
    except RuntimeError:
        # happens when actual writer was already available: asyncwriter didn't need
        # to start a thread
        pass