Example #1
0
    def update_backend(self, label, using):
        from haystack.exceptions import NotHandled

        backend = haystack_connections[using].get_backend()
        unified_index = haystack_connections[using].get_unified_index()

        if self.workers > 0:
            import multiprocessing

        for model in get_models(label):
            try:
                index = unified_index.get_index(model)
            except NotHandled:
                if self.verbosity >= 2:
                    print("Skipping '%s' - no index." % model)
                continue

            if self.workers > 0:
                # workers resetting connections leads to references to models / connections getting
                # stale and having their connection disconnected from under them. Resetting before
                # the loop continues and it accesses the ORM makes it better.
                db.close_connection()

            qs = index.build_queryset(using=using, start_date=self.start_date,
                                      end_date=self.end_date)

            total = qs.count()

            if self.verbosity >= 1:
                print(u"Indexing %d %s" % (total, force_text(model._meta.verbose_name_plural)))

            batch_size = self.batchsize or backend.batch_size

            if self.workers > 0:
                # multi-worker, many-queries
                ghetto_queue = []
                for start in range(0, total, batch_size):
                    end = min(start + batch_size, total)
                    ghetto_queue.append(('do_update', model, start, end, total, using, self.start_date, self.end_date, self.verbosity))
                pool = multiprocessing.Pool(self.workers)
                pool.map(worker, ghetto_queue)
                pool.terminate()
            else:
                # single-query, still-batched
                start = 0
                with server_side_cursors():
                    with transaction.atomic():
                        items = qs.iterator()  # prevents filling query-cache
                        while True:
                            added = do_update_batch(backend, index, items, start, batch_size, total, self.verbosity)
                            if added > 0:
                                start += added
                                continue
                            break

            if self.remove:
                if self.start_date or self.end_date or total <= 0:
                    # They're using a reduced set, which may not incorporate
                    # all pks. Rebuild the list with everything.
                    qs = index.index_queryset().values_list('pk', flat=True)
                    pks_seen = set(smart_bytes(pk) for pk in qs)

                    total = len(pks_seen)
                else:
                    pks_seen = set(smart_bytes(pk) for pk in qs.values_list('pk', flat=True))

                if self.workers > 0:
                    ghetto_queue = []

                for start in range(0, total, batch_size):
                    upper_bound = start + batch_size

                    if self.workers == 0:
                        do_remove(backend, index, model, pks_seen, start, upper_bound)
                    else:
                        ghetto_queue.append(('do_remove', model, pks_seen, start, upper_bound, using, self.verbosity))

                if self.workers > 0:
                    pool = multiprocessing.Pool(self.workers)
                    pool.map(worker, ghetto_queue)
                    pool.terminate()
Example #2
0
 def test_simple_02(self):
     with self.assertNumQueries(1):
         with server_side_cursors():
             self.assertEqual(len([x for x in TestModel.objects.all()]), 200)