Example #1
0
    def test_bulk_delete(self):
        p = Person.objects.create(name="Alex")
        for i in range(self.num_entries):
            Entry.objects.create(
                author=p,
                title="Entry with number %s" % i,
                text="foobar " * i
            )
        Entry.indexer.update()

        result = Entry.indexer.search("title:number")

        database = Entry.indexer._db.open(write=True)

        commiter = Commiter.create(False)(
            database.begin_transaction,
            database.commit_transaction,
            database.cancel_transaction
        )

        result_count = result.count()
        paginator = Paginator(result, self.per_page)

        self.assertEqual(paginator.num_pages, self.num_pages)

        page = paginator.page(2)

        commiter.begin_page()
        try:
            for obj in page.object_list:
                commiter.begin_object()
                try:
                    Entry.indexer.delete(obj.pk, database=database)
                    commiter.commit_object()
                except Exception:
                    commiter.cancel_object()
                    raise

            commiter.commit_page()
        except Exception:
            commiter.cancel_page()
            raise

        # database.commit() if hasattr(database, 'commit') else database.flush()
        self.assertEqual(Entry.indexer.search("title:number").count(),
                         result_count - self.per_page)
Example #2
0
    def test_concrete_commiter_object(self):
        def _begin():
            return 'begin'
        def _commit():
            return 'commit'
        def _rollback():
            return 'rollback'

        commiter = Commiter.create(True)(
            _begin,
            _commit,
            _rollback
        )

        self.assertEqual(commiter.begin_page(), None)
        self.assertEqual(commiter.commit_page(), None)
        self.assertEqual(commiter.cancel_page(), None)
        self.assertEqual(commiter.begin_object(), 'begin')
        self.assertEqual(commiter.commit_object(), 'commit')
        self.assertEqual(commiter.cancel_object(), 'rollback')
Example #3
0
def update_changes(verbose,
                   timeout,
                   once,
                   per_page,
                   commit_each,
                   app_models=None):
    counter = [0]

    def reset_counter():
        counter[0] = 0

    def after_index(obj):
        counter[0] += 1

        if verbose:
            sys.stdout.write('.')
            sys.stdout.flush()

    commiter = Commiter.create(commit_each)(lambda: None, transaction.commit,
                                            transaction.rollback)

    while True:
        count = Change.objects.count()
        if count > 0 and verbose:
            print('There are %d objects to update' % count)

        for ct in get_content_types(app_models, 'add', 'edit'):
            indexers = get_indexers(ct)

            for page in paginate(
                            Change.objects.filter(content_type=ct, action__in=('add', 'edit'))\
                                .select_related('content_type')\
                                .order_by('object_id'),
                            per_page
                        ):# The objects must be sorted by date
                commiter.begin_page()

                try:
                    for indexer in indexers:
                        indexer.update(
                            ct.model_class()._default_manager.filter(
                                pk__in=[c.object_id for c in page.object_list
                                        ]).order_by('pk'), after_index,
                            per_page, commit_each)

                    for change in page.object_list:
                        change.delete()

                    commiter.commit_page()
                except Exception:
                    if commit_each:
                        for change in page.object_list[:counter[0]]:
                            change.delete()
                        commiter.commit_object()
                    else:
                        commiter.cancel_page()
                    raise

                reset_counter()

        for ct in get_content_types(app_models, 'delete'):
            indexers = get_indexers(ct)

            for change in Change.objects.filter(content_type=ct,
                                                action='delete'):
                for indexer in indexers:
                    indexer.delete(change.object_id)
                    #change.delete()
                change.delete()

        # If using transactions and running Djapian as a daemon, transactions
        # need to be committed on each iteration, otherwise Djapian will not
        # catch changes. We also need to use the commit_manually decorator.
        #
        # Background information:
        #
        # Autocommit is turned off by default according to PEP 249.
        # PEP 249 states "Database modules that do not support transactions
        #                 should implement this method with void functionality".
        # Consistent Nonlocking Reads (InnoDB):
        # http://dev.mysql.com/doc/refman/5.0/en/innodb-consistent-read-example.html
        transaction.commit()

        if once:
            break

        time.sleep(timeout)
Example #4
0
    def update(self, documents=None, after_index=None, per_page=10000, commit_each=False):
        """
        Update the database with the documents.
        There are some default value and terms in a document:
         * Values:
           1. Used to store the ID of the document
           2. Store the model of the object (in the string format, like
              "project.app.model")
           3. Store the indexer descriptor (module path)
           4..10. Free

         * Terms
           UID: Used to store the ID of the document, so we can replace
                the document by the ID
        """
        # Open Xapian Database
        database = self._db.open(write=True)

        # If doesnt have any document at all
        if documents is None:
            update_queue = self._model.objects.all()
        else:
            update_queue = documents

        commiter = Commiter.create(commit_each)(
            lambda: database.begin_transaction(flush=True),
            database.commit_transaction,
            database.cancel_transaction
        )

        # Get each document received
        for page in paginate(update_queue, per_page):
            try:
                commiter.begin_page()

                for obj in page.object_list:
                    commiter.begin_object()

                    try:
                        if not self.trigger(obj):
                            self.delete(obj.pk, database)
                            continue

                        doc = xapian.Document()

                        # Add default terms and values
                        uid = self._create_uid(obj)
                        doc.add_term(self._create_uid(obj))
                        self._insert_meta_values(doc, obj)

                        generator = xapian.TermGenerator()
                        generator.set_database(database)
                        generator.set_document(doc)
                        generator.set_flags(xapian.TermGenerator.FLAG_SPELLING)

                        stem_lang = self._get_stem_language(obj)
                        if stem_lang:
                            generator.set_stemmer(xapian.Stem(stem_lang))

                        for field in self.fields + self.tags:
                            # Trying to resolve field value or skip it
                            try:
                                value = field.resolve(obj)
                            except AttributeError:
                                continue

                            if field.prefix:
                                index_value = field.convert(value, self._model)
                                if index_value is not None:
                                    doc.add_value(field.number, smart_unicode(index_value))

                            prefix = smart_unicode(field.get_tag())
                            generator.index_text(smart_unicode(value), field.weight, prefix)
                            if prefix:  # if prefixed then also index without prefix
                                generator.index_text(smart_unicode(value), field.weight)

                        database.replace_document(uid, doc)
                        if after_index:
                            after_index(obj)

                        commiter.commit_object()
                    except Exception:
                        commiter.cancel_object()
                        raise

                commiter.commit_page()
            except Exception:
                commiter.cancel_page()
                raise

        database.flush()
Example #5
0
def update_changes(verbose, timeout, once, per_page, commit_each, app_models=None):
    counter = [0]

    def reset_counter():
        counter[0] = 0

    def after_index(obj):
        counter[0] += 1

        if verbose:
            sys.stdout.write(".")
            sys.stdout.flush()

    commiter = Commiter.create(commit_each)(lambda: None, transaction.commit, transaction.rollback)

    while True:
        count = Change.objects.count()
        if count > 0 and verbose:
            print "There are %d objects to update" % count

        for ct in get_content_types(app_models, "add", "edit"):
            indexers = get_indexers(ct)

            for page in paginate(
                Change.objects.filter(content_type=ct, action__in=("add", "edit"))
                .select_related("content_type")
                .order_by("object_id"),
                per_page,
            ):  # The objects must be sorted by date
                commiter.begin_page()

                try:
                    for indexer in indexers:
                        indexer.update(
                            ct.model_class()
                            ._default_manager.filter(pk__in=[c.object_id for c in page.object_list])
                            .order_by("pk"),
                            after_index,
                            per_page,
                            commit_each,
                        )

                    for change in page.object_list:
                        change.delete()

                    commiter.commit_page()
                except Exception:
                    if commit_each:
                        for change in page.object_list[: counter[0]]:
                            change.delete()
                        commiter.commit_object()
                    else:
                        commiter.cancel_page()
                    raise

                reset_counter()

        for ct in get_content_types(app_models, "delete"):
            indexers = get_indexers(ct)

            for change in Change.objects.filter(content_type=ct, action="delete"):
                for indexer in indexers:
                    indexer.delete(change.object_id)
                change.delete()

        # If using transactions and running Djapian as a daemon, transactions
        # need to be committed on each iteration, otherwise Djapian will not
        # catch changes. We also need to use the commit_manually decorator.
        #
        # Background information:
        #
        # Autocommit is turned off by default according to PEP 249.
        # PEP 249 states "Database modules that do not support transactions
        #                 should implement this method with void functionality".
        # Consistent Nonlocking Reads (InnoDB):
        # http://dev.mysql.com/doc/refman/5.0/en/innodb-consistent-read-example.html
        transaction.commit()

        if once:
            break

        time.sleep(timeout)
Example #6
0
    def update(self, documents=None, after_index=None, per_page=10000, commit_each=False):
        """
        Update the database with the documents.
        There are some default value and terms in a document:
         * Values:
           1. Used to store the ID of the document
           2. Store the model of the object (in the string format, like
              "project.app.model")
           3. Store the indexer descriptor (module path)
           4..10. Free

         * Terms
           UID: Used to store the ID of the document, so we can replace
                the document by the ID
        """
        # Open Xapian Database
        database = self._db.open(write=True)

        # If doesnt have any document at all
        if documents is None:
            update_queue = self._model.objects.all()
        else:
            update_queue = documents

        commiter = Commiter.create(commit_each)(
            lambda: database.begin_transaction(flush=True),
            database.commit_transaction,
            database.cancel_transaction
        )

        # Get each document received
        for page in paginate(update_queue, per_page):
            try:
                commiter.begin_page()

                for obj in page.object_list:
                    commiter.begin_object()

                    try:
                        if not self.trigger(obj):
                            self.delete(obj.pk, database)
                            continue

                        doc = xapian.Document()

                        # Add default terms and values
                        uid = self._create_uid(obj)
                        doc.add_term(self._create_uid(obj))
                        self._insert_meta_values(doc, obj)

                        generator = xapian.TermGenerator()
                        generator.set_database(database)
                        generator.set_document(doc)
                        generator.set_flags(xapian.TermGenerator.FLAG_SPELLING)

                        stemming_lang = self._get_stem_language(obj)
                        if stemming_lang:
                            stemmer = self.get_stemmer(stemming_lang)
                            generator.set_stemmer(stemmer)

                            stopper = self.get_stopper(stemming_lang)
                            if stopper:
                                generator.set_stopper(stopper)

                        # Get a weight for the object
                        obj_weight = self._get_object_weight(obj)
                        # Index fields
                        self._do_index_fields(doc, generator, obj, obj_weight)

                        database.replace_document(uid, doc)
                        if after_index:
                            after_index(obj)

                        commiter.commit_object()
                    except Exception:
                        commiter.cancel_object()
                        raise

                commiter.commit_page()
            except Exception:
                commiter.cancel_page()
                raise
Example #7
0
    def update(self,
               documents=None,
               after_index=None,
               per_page=10000,
               commit_each=False):
        """
        Update the database with the documents.
        There are some default value and terms in a document:
         * Values:
           1. Used to store the ID of the document
           2. Store the model of the object (in the string format, like
              "project.app.model")
           3. Store the indexer descriptor (module path)
           4..10. Free

         * Terms
           UID: Used to store the ID of the document, so we can replace
                the document by the ID
        """
        # Open Xapian Database
        database = self._db.open(write=True)

        # If doesnt have any document at all
        if documents is None:
            update_queue = self._model.objects.all()
        else:
            update_queue = documents

        commiter = Commiter.create(commit_each)(
            #lambda: database.begin_transaction(flush=True),
            database.begin_transaction,
            database.commit_transaction,
            database.cancel_transaction)

        # Get each document received
        for page in paginate(update_queue, per_page):
            try:
                commiter.begin_page()

                for obj in page.object_list:
                    commiter.begin_object()

                    try:
                        if not self.trigger(obj):
                            self.delete(obj.pk, database)
                            continue

                        doc = xapian.Document()

                        # Add default terms and values
                        uid = self._create_uid(obj)
                        doc.add_term(self._create_uid(obj))
                        self._insert_meta_values(doc, obj)

                        generator = xapian.TermGenerator()
                        generator.set_database(database)
                        generator.set_document(doc)
                        generator.set_flags(xapian.TermGenerator.FLAG_SPELLING)

                        #stem_lang = self._get_stem_language(obj)
                        #if stem_lang:
                        #generator.set_stemmer(xapian.Stem(stem_lang))
                        #stopper = self.get_stopper(stem_lang)
                        #if stopper:
                        #generator.set_stopper(stopper)
                        stemming_lang = self._get_stem_language(obj)
                        if stemming_lang:
                            stemmer = self.get_stemmer(stemming_lang)
                            generator.set_stemmer(stemmer)
                            stopper = self.get_stopper(stemming_lang)
                            if stopper:
                                generator.set_stopper(stopper)

                        #for field in self.fields + self.tags:
                        # Trying to resolve field value or skip it
                        #try:
                        #value = field.resolve(obj)
                        #except AttributeError:
                        #continue
                        #if field.prefix:
                        #index_value = field.convert(value, self._model)
                        #if index_value is not None:
                        #doc.add_value(field.number, smart_text(index_value))
                        #prefix = smart_text(field.get_tag())
                        #generator.index_text(smart_text(value), field.weight, prefix)
                        #if prefix:  # if prefixed then also index without prefix
                        #generator.index_text(smart_text(value), field.weight)
                        #database.replace_document(uid, doc)
                        #if after_index:
                        #after_index(obj)

                        # Get a weight for the object
                        obj_weight = self._get_object_weight(obj)
                        # Index fields
                        self._do_index_fields(doc, generator, obj, obj_weight)

                        database.replace_document(uid, doc)
                        if after_index:
                            after_index(obj)

                        commiter.commit_object()
                    except Exception:
                        commiter.cancel_object()
                        raise

                commiter.commit_page()
            except Exception:
                commiter.cancel_page()
                raise

        database.flush()
Example #8
0
def update_changes(verbose, timeout, once, per_page, commit_each):
    counter = [0]

    def reset_counter():
        counter[0] = [0]

    def after_index(obj):
        counter[0] += 1

        if verbose:
            sys.stdout.write('.')
            sys.stdout.flush()

    commiter = Commiter.create(commit_each)(
        lambda: None,
        transaction.commit,
        transaction.rollback
    )

    while True:
        count = Change.objects.count()
        if count > 0 and verbose:
            print 'There are %d objects to update' % count

        for ct in get_content_types('add', 'update'):
            indexers = get_indexers(ct)

            for page in paginate(
                            Change.objects.filter(content_type=ct, action__in=('add', 'update'))\
                                .select_related('content_type')\
                                .order_by('object_id'),
                            per_page
                        ):# The objects must be sorted by date
                commiter.begin_page()

                try:
                    for indexer in indexers:
                        indexer.update(
                            ct.model_class()._default_manager.filter(
                                pk__in=[c.object_id for c in page.object_list]
                            ).order_by('pk'),
                            after_index,
                            per_page,
                            commit_each
                        )

                    for change in page.object_list:
                        change.delete()

                    commiter.commit_page()
                except Exception:
                    if commit_each:
                        for change in page.object_list[:counter[0]]:
                            change.delete()
                        commiter.commit_object()
                    else:
                        commiter.cancel_page()
                    raise

                reset_counter()

        for ct in get_content_types('delete'):
            indexers = get_indexers(ct)

            for change in Change.objects.filter(content_type=ct, action='delete'):
                for indexer in indexers:
                    indexer.delete(change.object_id)
                    change.delete()

        if once:
            break

        time.sleep(timeout)
Example #9
0
    def update(self,
               documents=None,
               after_index=None,
               per_page=10000,
               commit_each=False):
        """
        Update the database with the documents.
        There are some default value and terms in a document:
         * Values:
           1. Used to store the ID of the document
           2. Store the model of the object (in the string format, like
              "project.app.model")
           3. Store the indexer descriptor (module path)
           4..10. Free

         * Terms
           UID: Used to store the ID of the document, so we can replace
                the document by the ID
        """
        # Open Xapian Database
        database = self._db.open(write=True)

        # If doesnt have any document at all
        if documents is None:
            update_queue = self._model.objects.all()
        else:
            update_queue = documents

        commiter = Commiter.create(commit_each)(
            lambda: database.begin_transaction(flush=True),
            database.commit_transaction, database.cancel_transaction)

        # Get each document received
        for page in paginate(update_queue, per_page):
            try:
                commiter.begin_page()

                for obj in page.object_list:
                    commiter.begin_object()

                    try:
                        if not self.trigger(obj):
                            self.delete(obj.pk, database)
                            continue

                        doc = xapian.Document()

                        # Add default terms and values
                        uid = self._create_uid(obj)
                        doc.add_term(self._create_uid(obj))
                        self._insert_meta_values(doc, obj)

                        generator = xapian.TermGenerator()
                        generator.set_database(database)
                        generator.set_document(doc)
                        generator.set_flags(xapian.TermGenerator.FLAG_SPELLING)

                        stemming_lang = self._get_stem_language(obj)
                        if stemming_lang:
                            stemmer = self.get_stemmer(stemming_lang)
                            generator.set_stemmer(stemmer)

                            stopper = self.get_stopper(stemming_lang)
                            if stopper:
                                generator.set_stopper(stopper)

                        # Get a weight for the object
                        obj_weight = self._get_object_weight(obj)
                        # Index fields
                        self._do_index_fields(doc, generator, obj, obj_weight)

                        database.replace_document(uid, doc)
                        if after_index:
                            after_index(obj)

                        commiter.commit_object()
                    except Exception:
                        commiter.cancel_object()
                        raise

                commiter.commit_page()
            except Exception:
                commiter.cancel_page()
                raise