def test_bulk_delete(self): p = Person.objects.create(name="Alex") for i in range(self.num_entries): Entry.objects.create( author=p, title="Entry with number %s" % i, text="foobar " * i ) Entry.indexer.update() result = Entry.indexer.search("title:number") database = Entry.indexer._db.open(write=True) commiter = Commiter.create(False)( database.begin_transaction, database.commit_transaction, database.cancel_transaction ) result_count = result.count() paginator = Paginator(result, self.per_page) self.assertEqual(paginator.num_pages, self.num_pages) page = paginator.page(2) commiter.begin_page() try: for obj in page.object_list: commiter.begin_object() try: Entry.indexer.delete(obj.pk, database=database) commiter.commit_object() except Exception: commiter.cancel_object() raise commiter.commit_page() except Exception: commiter.cancel_page() raise # database.commit() if hasattr(database, 'commit') else database.flush() self.assertEqual(Entry.indexer.search("title:number").count(), result_count - self.per_page)
def test_concrete_commiter_object(self): def _begin(): return 'begin' def _commit(): return 'commit' def _rollback(): return 'rollback' commiter = Commiter.create(True)( _begin, _commit, _rollback ) self.assertEqual(commiter.begin_page(), None) self.assertEqual(commiter.commit_page(), None) self.assertEqual(commiter.cancel_page(), None) self.assertEqual(commiter.begin_object(), 'begin') self.assertEqual(commiter.commit_object(), 'commit') self.assertEqual(commiter.cancel_object(), 'rollback')
def update_changes(verbose, timeout, once, per_page, commit_each, app_models=None): counter = [0] def reset_counter(): counter[0] = 0 def after_index(obj): counter[0] += 1 if verbose: sys.stdout.write('.') sys.stdout.flush() commiter = Commiter.create(commit_each)(lambda: None, transaction.commit, transaction.rollback) while True: count = Change.objects.count() if count > 0 and verbose: print('There are %d objects to update' % count) for ct in get_content_types(app_models, 'add', 'edit'): indexers = get_indexers(ct) for page in paginate( Change.objects.filter(content_type=ct, action__in=('add', 'edit'))\ .select_related('content_type')\ .order_by('object_id'), per_page ):# The objects must be sorted by date commiter.begin_page() try: for indexer in indexers: indexer.update( ct.model_class()._default_manager.filter( pk__in=[c.object_id for c in page.object_list ]).order_by('pk'), after_index, per_page, commit_each) for change in page.object_list: change.delete() commiter.commit_page() except Exception: if commit_each: for change in page.object_list[:counter[0]]: change.delete() commiter.commit_object() else: commiter.cancel_page() raise reset_counter() for ct in get_content_types(app_models, 'delete'): indexers = get_indexers(ct) for change in Change.objects.filter(content_type=ct, action='delete'): for indexer in indexers: indexer.delete(change.object_id) #change.delete() change.delete() # If using transactions and running Djapian as a daemon, transactions # need to be committed on each iteration, otherwise Djapian will not # catch changes. We also need to use the commit_manually decorator. # # Background information: # # Autocommit is turned off by default according to PEP 249. # PEP 249 states "Database modules that do not support transactions # should implement this method with void functionality". # Consistent Nonlocking Reads (InnoDB): # http://dev.mysql.com/doc/refman/5.0/en/innodb-consistent-read-example.html transaction.commit() if once: break time.sleep(timeout)
def update(self, documents=None, after_index=None, per_page=10000, commit_each=False): """ Update the database with the documents. There are some default value and terms in a document: * Values: 1. Used to store the ID of the document 2. Store the model of the object (in the string format, like "project.app.model") 3. Store the indexer descriptor (module path) 4..10. Free * Terms UID: Used to store the ID of the document, so we can replace the document by the ID """ # Open Xapian Database database = self._db.open(write=True) # If doesnt have any document at all if documents is None: update_queue = self._model.objects.all() else: update_queue = documents commiter = Commiter.create(commit_each)( lambda: database.begin_transaction(flush=True), database.commit_transaction, database.cancel_transaction ) # Get each document received for page in paginate(update_queue, per_page): try: commiter.begin_page() for obj in page.object_list: commiter.begin_object() try: if not self.trigger(obj): self.delete(obj.pk, database) continue doc = xapian.Document() # Add default terms and values uid = self._create_uid(obj) doc.add_term(self._create_uid(obj)) self._insert_meta_values(doc, obj) generator = xapian.TermGenerator() generator.set_database(database) generator.set_document(doc) generator.set_flags(xapian.TermGenerator.FLAG_SPELLING) stem_lang = self._get_stem_language(obj) if stem_lang: generator.set_stemmer(xapian.Stem(stem_lang)) for field in self.fields + self.tags: # Trying to resolve field value or skip it try: value = field.resolve(obj) except AttributeError: continue if field.prefix: index_value = field.convert(value, self._model) if index_value is not None: doc.add_value(field.number, smart_unicode(index_value)) prefix = smart_unicode(field.get_tag()) generator.index_text(smart_unicode(value), field.weight, prefix) if prefix: # if prefixed then also index without prefix generator.index_text(smart_unicode(value), field.weight) database.replace_document(uid, doc) if after_index: after_index(obj) commiter.commit_object() except Exception: commiter.cancel_object() raise commiter.commit_page() except Exception: commiter.cancel_page() raise database.flush()
def update_changes(verbose, timeout, once, per_page, commit_each, app_models=None): counter = [0] def reset_counter(): counter[0] = 0 def after_index(obj): counter[0] += 1 if verbose: sys.stdout.write(".") sys.stdout.flush() commiter = Commiter.create(commit_each)(lambda: None, transaction.commit, transaction.rollback) while True: count = Change.objects.count() if count > 0 and verbose: print "There are %d objects to update" % count for ct in get_content_types(app_models, "add", "edit"): indexers = get_indexers(ct) for page in paginate( Change.objects.filter(content_type=ct, action__in=("add", "edit")) .select_related("content_type") .order_by("object_id"), per_page, ): # The objects must be sorted by date commiter.begin_page() try: for indexer in indexers: indexer.update( ct.model_class() ._default_manager.filter(pk__in=[c.object_id for c in page.object_list]) .order_by("pk"), after_index, per_page, commit_each, ) for change in page.object_list: change.delete() commiter.commit_page() except Exception: if commit_each: for change in page.object_list[: counter[0]]: change.delete() commiter.commit_object() else: commiter.cancel_page() raise reset_counter() for ct in get_content_types(app_models, "delete"): indexers = get_indexers(ct) for change in Change.objects.filter(content_type=ct, action="delete"): for indexer in indexers: indexer.delete(change.object_id) change.delete() # If using transactions and running Djapian as a daemon, transactions # need to be committed on each iteration, otherwise Djapian will not # catch changes. We also need to use the commit_manually decorator. # # Background information: # # Autocommit is turned off by default according to PEP 249. # PEP 249 states "Database modules that do not support transactions # should implement this method with void functionality". # Consistent Nonlocking Reads (InnoDB): # http://dev.mysql.com/doc/refman/5.0/en/innodb-consistent-read-example.html transaction.commit() if once: break time.sleep(timeout)
def update(self, documents=None, after_index=None, per_page=10000, commit_each=False): """ Update the database with the documents. There are some default value and terms in a document: * Values: 1. Used to store the ID of the document 2. Store the model of the object (in the string format, like "project.app.model") 3. Store the indexer descriptor (module path) 4..10. Free * Terms UID: Used to store the ID of the document, so we can replace the document by the ID """ # Open Xapian Database database = self._db.open(write=True) # If doesnt have any document at all if documents is None: update_queue = self._model.objects.all() else: update_queue = documents commiter = Commiter.create(commit_each)( lambda: database.begin_transaction(flush=True), database.commit_transaction, database.cancel_transaction ) # Get each document received for page in paginate(update_queue, per_page): try: commiter.begin_page() for obj in page.object_list: commiter.begin_object() try: if not self.trigger(obj): self.delete(obj.pk, database) continue doc = xapian.Document() # Add default terms and values uid = self._create_uid(obj) doc.add_term(self._create_uid(obj)) self._insert_meta_values(doc, obj) generator = xapian.TermGenerator() generator.set_database(database) generator.set_document(doc) generator.set_flags(xapian.TermGenerator.FLAG_SPELLING) stemming_lang = self._get_stem_language(obj) if stemming_lang: stemmer = self.get_stemmer(stemming_lang) generator.set_stemmer(stemmer) stopper = self.get_stopper(stemming_lang) if stopper: generator.set_stopper(stopper) # Get a weight for the object obj_weight = self._get_object_weight(obj) # Index fields self._do_index_fields(doc, generator, obj, obj_weight) database.replace_document(uid, doc) if after_index: after_index(obj) commiter.commit_object() except Exception: commiter.cancel_object() raise commiter.commit_page() except Exception: commiter.cancel_page() raise
def update(self, documents=None, after_index=None, per_page=10000, commit_each=False): """ Update the database with the documents. There are some default value and terms in a document: * Values: 1. Used to store the ID of the document 2. Store the model of the object (in the string format, like "project.app.model") 3. Store the indexer descriptor (module path) 4..10. Free * Terms UID: Used to store the ID of the document, so we can replace the document by the ID """ # Open Xapian Database database = self._db.open(write=True) # If doesnt have any document at all if documents is None: update_queue = self._model.objects.all() else: update_queue = documents commiter = Commiter.create(commit_each)( #lambda: database.begin_transaction(flush=True), database.begin_transaction, database.commit_transaction, database.cancel_transaction) # Get each document received for page in paginate(update_queue, per_page): try: commiter.begin_page() for obj in page.object_list: commiter.begin_object() try: if not self.trigger(obj): self.delete(obj.pk, database) continue doc = xapian.Document() # Add default terms and values uid = self._create_uid(obj) doc.add_term(self._create_uid(obj)) self._insert_meta_values(doc, obj) generator = xapian.TermGenerator() generator.set_database(database) generator.set_document(doc) generator.set_flags(xapian.TermGenerator.FLAG_SPELLING) #stem_lang = self._get_stem_language(obj) #if stem_lang: #generator.set_stemmer(xapian.Stem(stem_lang)) #stopper = self.get_stopper(stem_lang) #if stopper: #generator.set_stopper(stopper) stemming_lang = self._get_stem_language(obj) if stemming_lang: stemmer = self.get_stemmer(stemming_lang) generator.set_stemmer(stemmer) stopper = self.get_stopper(stemming_lang) if stopper: generator.set_stopper(stopper) #for field in self.fields + self.tags: # Trying to resolve field value or skip it #try: #value = field.resolve(obj) #except AttributeError: #continue #if field.prefix: #index_value = field.convert(value, self._model) #if index_value is not None: #doc.add_value(field.number, smart_text(index_value)) #prefix = smart_text(field.get_tag()) #generator.index_text(smart_text(value), field.weight, prefix) #if prefix: # if prefixed then also index without prefix #generator.index_text(smart_text(value), field.weight) #database.replace_document(uid, doc) #if after_index: #after_index(obj) # Get a weight for the object obj_weight = self._get_object_weight(obj) # Index fields self._do_index_fields(doc, generator, obj, obj_weight) database.replace_document(uid, doc) if after_index: after_index(obj) commiter.commit_object() except Exception: commiter.cancel_object() raise commiter.commit_page() except Exception: commiter.cancel_page() raise database.flush()
def update_changes(verbose, timeout, once, per_page, commit_each): counter = [0] def reset_counter(): counter[0] = [0] def after_index(obj): counter[0] += 1 if verbose: sys.stdout.write('.') sys.stdout.flush() commiter = Commiter.create(commit_each)( lambda: None, transaction.commit, transaction.rollback ) while True: count = Change.objects.count() if count > 0 and verbose: print 'There are %d objects to update' % count for ct in get_content_types('add', 'update'): indexers = get_indexers(ct) for page in paginate( Change.objects.filter(content_type=ct, action__in=('add', 'update'))\ .select_related('content_type')\ .order_by('object_id'), per_page ):# The objects must be sorted by date commiter.begin_page() try: for indexer in indexers: indexer.update( ct.model_class()._default_manager.filter( pk__in=[c.object_id for c in page.object_list] ).order_by('pk'), after_index, per_page, commit_each ) for change in page.object_list: change.delete() commiter.commit_page() except Exception: if commit_each: for change in page.object_list[:counter[0]]: change.delete() commiter.commit_object() else: commiter.cancel_page() raise reset_counter() for ct in get_content_types('delete'): indexers = get_indexers(ct) for change in Change.objects.filter(content_type=ct, action='delete'): for indexer in indexers: indexer.delete(change.object_id) change.delete() if once: break time.sleep(timeout)
def update(self, documents=None, after_index=None, per_page=10000, commit_each=False): """ Update the database with the documents. There are some default value and terms in a document: * Values: 1. Used to store the ID of the document 2. Store the model of the object (in the string format, like "project.app.model") 3. Store the indexer descriptor (module path) 4..10. Free * Terms UID: Used to store the ID of the document, so we can replace the document by the ID """ # Open Xapian Database database = self._db.open(write=True) # If doesnt have any document at all if documents is None: update_queue = self._model.objects.all() else: update_queue = documents commiter = Commiter.create(commit_each)( lambda: database.begin_transaction(flush=True), database.commit_transaction, database.cancel_transaction) # Get each document received for page in paginate(update_queue, per_page): try: commiter.begin_page() for obj in page.object_list: commiter.begin_object() try: if not self.trigger(obj): self.delete(obj.pk, database) continue doc = xapian.Document() # Add default terms and values uid = self._create_uid(obj) doc.add_term(self._create_uid(obj)) self._insert_meta_values(doc, obj) generator = xapian.TermGenerator() generator.set_database(database) generator.set_document(doc) generator.set_flags(xapian.TermGenerator.FLAG_SPELLING) stemming_lang = self._get_stem_language(obj) if stemming_lang: stemmer = self.get_stemmer(stemming_lang) generator.set_stemmer(stemmer) stopper = self.get_stopper(stemming_lang) if stopper: generator.set_stopper(stopper) # Get a weight for the object obj_weight = self._get_object_weight(obj) # Index fields self._do_index_fields(doc, generator, obj, obj_weight) database.replace_document(uid, doc) if after_index: after_index(obj) commiter.commit_object() except Exception: commiter.cancel_object() raise commiter.commit_page() except Exception: commiter.cancel_page() raise