Python haystack_get_modelsの例、haystack.utils.app_loading.haystack_get_models Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_app_loading.py プロジェクト: makkkii/django-haystack-2.8.1

 def test_hierarchal_app_specific_model(self):
     models = app_loading.haystack_get_models(
         'hierarchal_app_django.HierarchalAppModel')
     self.assertIsInstance(models, (list, GeneratorType))
     self.assertSetEqual(
         set(str(i._meta) for i in models),
         set(('hierarchal_app_django.hierarchalappmodel', )))

コード例 #2

0

ファイルを表示

ファイル: core_find_stale_elasticsearch_documents.py プロジェクト: mysociety/peoples-assembly

def get_all_indexed_models():

    backends = haystack_connections.connections_info.keys()

    available_models = {}

    for backend_key in backends:
        connection = haystack_connections[backend_key]
        backend = connection.get_backend()
        unified_index = haystack_connections[backend_key].get_unified_index()
        for app in haystack_load_apps():
            for model in haystack_get_models(app):
                try:
                    index = unified_index.get_index(model)
                except NotHandled:
                    continue
                model_name = model.__module__ + '.' + model.__name__
                available_models[model_name] = {
                    'backend_key': backend_key,
                    'backend': backend,
                    'app': app,
                    'model': model,
                    'index': index,
                }

    return available_models

コード例 #3

0

ファイルを表示

ファイル: test_app_loading.py プロジェクト: mahajandiwakar/django-haystack

 def test_hierarchal_app_get_models(self):
     models = app_loading.haystack_get_models("hierarchal_app_django")
     self.assertIsInstance(models, (list, GeneratorType))
     self.assertSetEqual(
         set(str(i._meta) for i in models),
         set(("hierarchal_app_django.hierarchalappsecondmodel", "hierarchal_app_django.hierarchalappmodel")),
     )

コード例 #4

0

ファイルを表示

 def test_hierarchal_app_get_models(self):
     models = app_loading.haystack_get_models("hierarchal_app_django")
     self.assertIsInstance(models, (list, GeneratorType))
     self.assertSetEqual(
         set(str(i._meta) for i in models),
         set((
             "hierarchal_app_django.hierarchalappsecondmodel",
             "hierarchal_app_django.hierarchalappmodel",
         )),
     )

コード例 #5

0

ファイルを表示

ファイル: update_chembl_index.py プロジェクト: stjordanis/chembl_webservices_2

    def update_backend(self, label, using):
        backend = haystack_connections[using].get_backend()
        unified_index = haystack_connections[using].get_unified_index()

        for model in haystack_get_models(label):
            try:
                index = unified_index.get_index(model)
            except NotHandled:
                continue

            qs = index.build_queryset(using=using,
                                      start_date=self.start_date,
                                      end_date=self.end_date)

            total = qs.count()

            if self.verbosity >= 1:
                self.stdout.write(
                    u"Indexing %d %s" %
                    (total, plural(force_text(model._meta.verbose_name))))

            batch_size = self.batchsize or backend.batch_size

            pbar = ProgressBar(widgets=[
                '{0}: '.format(model._meta.verbose_name),
                Percentage(), ' (',
                Counter(), ') ',
                Bar(marker=RotatingMarker()), ' ',
                ETA()
            ],
                               maxval=total).start()

            last_pk = None
            for start in range(0, total, batch_size):
                pbar.update(start)
                if not last_pk:
                    last_pk = qs.only('pk').values_list('pk')[start][0]
                original_data = model.objects.using(using).filter(
                    pk__gt=last_pk).prefetch_related(
                        *index.get_prefetch()).order_by('pk')[:batch_size]
                actual_size = len(original_data)
                last_pk = original_data[actual_size - 1].pk
                do_update(backend,
                          index,
                          original_data,
                          commit=self.commit,
                          max_retries=self.max_retries)

            pbar.update(total)
            pbar.finish()

コード例 #6

0

ファイルを表示

    def get_ultimas_publicacoes_uma_por_tipo__nao_usada(self):
        search_models = model_choices()

        results = []

        for m in search_models:
            sqs = SearchQuerySet().all()
            sqs = sqs.filter(at=0)
            sqs = sqs.models(*haystack_get_models(m[0]))
            sqs = sqs.order_by('-data', '-last_update')[:5]
            if len(sqs):
                results.append(sqs[0])

        return results

コード例 #7

0

ファイルを表示

    def get_ultimas_publicacoes(self):
        sqs = SearchQuerySet().all()
        sqs = sqs.filter(
            Q(at=0)
            | Q(at__in=AreaTrabalho.objects.areatrabalho_publica().values_list(
                'id', flat=True)))
        sqs = sqs.models(
            *haystack_get_models('protocoloadm.documentoadministrativo'))
        sqs = sqs.order_by('-data', '-last_update')[:100]

        r = []
        for sr in sqs:
            if sr.object and sr.object._certidao:
                if sr.object._certidao.exists():
                    r.append(sr)

                    if len(r) == 20:
                        break

        return r

コード例 #8

0

ファイルを表示

    def test_get_models_specific(self):
        from test_haystack.core.models import MockModel

        models = app_loading.haystack_get_models("core.MockModel")
        self.assertIsInstance(models, (list, GeneratorType))
        self.assertListEqual(models, [MockModel])

コード例 #9

0

ファイルを表示

 def test_get_models_all(self):
     models = app_loading.haystack_get_models("core")
     self.assertIsInstance(models, (list, GeneratorType))

コード例 #10

0

ファイルを表示

ファイル: update_index.py プロジェクト: AlliedSecurityTrust/mc_django-haystack

    def update_backend(self, label, using):
        backend = haystack_connections[using].get_backend()
        unified_index = haystack_connections[using].get_unified_index()

        for model in haystack_get_models(label):
            try:
                index = unified_index.get_index(model)
            except NotHandled:
                if self.verbosity >= 2:
                    self.stdout.write("Skipping '%s' - no index." % model)
                continue

            if self.workers > 0:
                # workers resetting connections leads to references to models / connections getting
                # stale and having their connection disconnected from under them. Resetting before
                # the loop continues and it accesses the ORM makes it better.
                close_old_connections()

            qs = index.build_queryset(using=using, start_date=self.start_date,
                                      end_date=self.end_date)

            total = qs.count()

            if self.verbosity >= 1:
                self.stdout.write(u"Indexing %d %s" % (
                    total, force_text(model._meta.verbose_name_plural))
                )

            batch_size = self.batchsize or backend.batch_size

            if self.workers > 0:
                ghetto_queue = []

            for start in range(0, total, batch_size):
                end = min(start + batch_size, total)

                if self.workers == 0:
                    do_update(backend, index, qs, start, end, total, verbosity=self.verbosity,
                              commit=self.commit, max_retries=self.max_retries)
                else:
                    ghetto_queue.append((model, start, end, total, using, self.start_date, self.end_date,
                                         self.verbosity, self.commit, self.max_retries))

            if self.workers > 0:
                pool = multiprocessing.Pool(self.workers)

                successful_tasks = pool.map(update_worker, ghetto_queue)

                if len(ghetto_queue) != len(successful_tasks):
                    self.stderr.write('Queued %d tasks but only %d completed' % (len(ghetto_queue),
                                                                                 len(successful_tasks)))
                    for i in ghetto_queue:
                        if i not in successful_tasks:
                            self.stderr.write('Incomplete task: %s' % repr(i))

                pool.close()
                pool.join()

            if self.remove:
                if self.start_date or self.end_date or total <= 0:
                    # They're using a reduced set, which may not incorporate
                    # all pks. Rebuild the list with everything.
                    qs = index.index_queryset().values_list('pk', flat=True)
                    database_pks = set(smart_bytes(pk) for pk in qs)

                    total = len(database_pks)
                else:
                    database_pks = set(smart_bytes(pk) for pk in qs.values_list('pk', flat=True))

                # Since records may still be in the search index but not the local database
                # we'll use that to create batches for processing.
                # See https://github.com/django-haystack/django-haystack/issues/1186
                index_total = SearchQuerySet(using=backend.connection_alias).models(model).count()

                # Retrieve PKs from the index. Note that this cannot be a numeric range query because although
                # pks are normally numeric they can be non-numeric UUIDs or other custom values. To reduce
                # load on the search engine, we only retrieve the pk field, which will be checked against the
                # full list obtained from the database, and the id field, which will be used to delete the
                # record should it be found to be stale.
                index_pks = SearchQuerySet(using=backend.connection_alias).models(model)
                index_pks = index_pks.values_list('pk', 'id')

                # We'll collect all of the record IDs which are no longer present in the database and delete
                # them after walking the entire index. This uses more memory than the incremental approach but
                # avoids needing the pagination logic below to account for both commit modes:
                stale_records = set()

                for start in range(0, index_total, batch_size):
                    upper_bound = start + batch_size

                    # If the database pk is no longer present, queue the index key for removal:
                    for pk, rec_id in index_pks[start:upper_bound]:
                        if smart_bytes(pk) not in database_pks:
                            stale_records.add(rec_id)

                if stale_records:
                    if self.verbosity >= 1:
                        self.stdout.write("  removing %d stale records." % len(stale_records))

                    for rec_id in stale_records:
                        # Since the PK was not in the database list, we'll delete the record from the search
                        # index:
                        if self.verbosity >= 2:
                            self.stdout.write("  removing %s." % rec_id)

                        backend.remove(rec_id, commit=self.commit)

コード例 #11

0

ファイルを表示

ファイル: update_index.py プロジェクト: phingage/django-haystack

    def update_backend(self, label, using):
        from haystack.exceptions import NotHandled

        backend = haystack_connections[using].get_backend()
        unified_index = haystack_connections[using].get_unified_index()

        if self.workers > 0:
            import multiprocessing

        for model in haystack_get_models(label):
            try:
                index = unified_index.get_index(model)
            except NotHandled:
                if self.verbosity >= 2:
                    print("Skipping '%s' - no index." % model)
                continue

            if self.workers > 0:
                # workers resetting connections leads to references to models / connections getting
                # stale and having their connection disconnected from under them. Resetting before
                # the loop continues and it accesses the ORM makes it better.
                close_old_connections()

            qs = index.build_queryset(using=using, start_date=self.start_date,
                                      end_date=self.end_date)

            total = qs.count()

            if self.verbosity >= 1:
                print(u"Indexing %d %s" % (total, force_text(model._meta.verbose_name_plural)))

            batch_size = self.batchsize or backend.batch_size

            if self.workers > 0:
                ghetto_queue = []

            for start in range(0, total, batch_size):
                end = min(start + batch_size, total)

                if self.workers == 0:
                    do_update(backend, index, qs, start, end, total, verbosity=self.verbosity, commit=self.commit)
                else:
                    ghetto_queue.append(('do_update', model, start, end, total, using, self.start_date, self.end_date, self.verbosity, self.commit))

            if self.workers > 0:
                pool = multiprocessing.Pool(self.workers)
                pool.map(worker, ghetto_queue)
                pool.close()
                pool.join()

            if self.remove:
                if self.start_date or self.end_date or total <= 0:
                    # They're using a reduced set, which may not incorporate
                    # all pks. Rebuild the list with everything.
                    qs = index.index_queryset().values_list('pk', flat=True)
                    pks_seen = set(smart_bytes(pk) for pk in qs)

                    total = len(pks_seen)
                else:
                    pks_seen = set(smart_bytes(pk) for pk in qs.values_list('pk', flat=True))

                if self.workers > 0:
                    ghetto_queue = []

                for start in range(0, total, batch_size):
                    upper_bound = start + batch_size

                    if self.workers == 0:
                        do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=self.verbosity,  commit=self.commit)
                    else:
                        ghetto_queue.append(('do_remove', model, pks_seen, start, upper_bound, using, self.verbosity, self.commit))

                if self.workers > 0:
                    pool = multiprocessing.Pool(self.workers)
                    pool.map(worker, ghetto_queue)
                    pool.terminate()

コード例 #12

0

ファイルを表示

ファイル: check_deal_links.py プロジェクト: Caimany/haystatck_ES1.6_v0.1

    def update_backend(self, label, using):
        from haystack.exceptions import NotHandled

        backend = haystack_connections[using].get_backend()
        unified_index = haystack_connections[using].get_unified_index()

        if self.workers > 0:
            import multiprocessing

        for model in haystack_get_models(label):
            try:
                index = unified_index.get_index(model)
            except NotHandled:
                if self.verbosity >= 2:
                    print("Skipping '%s' - no index." % model)
                continue

            if self.workers > 0:
                # workers resetting connections leads to references to models / connections getting
                # stale and having their connection disconnected from under them. Resetting before
                # the loop continues and it accesses the ORM makes it better.
                close_old_connections()

            qs = index.build_queryset(using=using, start_date=self.start_date,
                                      end_date=self.end_date)

            # total 为 id 最大那个
            # modify
            max_id = qs.reverse()[0].id
            total = qs.count()
            print("id 最大为 %s" % (max_id))

            if self.verbosity >= 1:
                print(u"Indexing %d %s" % (total, force_text(model._meta.verbose_name_plural)))

            batch_size = self.batchsize or backend.batch_size

            if self.workers > 0:
                ghetto_queue = []

            # modify
            # for start in range(0, total, batch_size):
            #     end = min(start + batch_size, total)

            ### custom start id


            print(self.startid)

            for start in range(self.startid, max_id, batch_size):
                end = min(start + batch_size, max_id)

                if self.workers == 0:
                    do_update(backend, index, qs, start, end, total, verbosity=self.verbosity, commit=self.commit)
                else:
                    ghetto_queue.append(('do_update', model, start, end, total, using, self.start_date, self.end_date,
                                         self.verbosity, self.commit))

            if self.workers > 0:
                pool = multiprocessing.Pool(self.workers)
                pool.map(worker, ghetto_queue)
                pool.close()
                pool.join()

コード例 #13

0

ファイルを表示

ファイル: test_app_loading.py プロジェクト: aptivate/django-haystack

 def test_get_models_all(self):
     models = app_loading.haystack_get_models('core')
     self.assertIsInstance(models, (list, GeneratorType))

コード例 #14

0

ファイルを表示

ファイル: update_index.py プロジェクト: wicol/django-haystack

    def update_backend(self, label, using):
        from haystack.exceptions import NotHandled

        backend = haystack_connections[using].get_backend()
        unified_index = haystack_connections[using].get_unified_index()

        if self.workers > 0:
            import multiprocessing

        for model in haystack_get_models(label):
            try:
                index = unified_index.get_index(model)
            except NotHandled:
                if self.verbosity >= 2:
                    print("Skipping '%s' - no index." % model)
                continue

            if self.workers > 0:
                # workers resetting connections leads to references to models / connections getting
                # stale and having their connection disconnected from under them. Resetting before
                # the loop continues and it accesses the ORM makes it better.
                db.close_connection()

            qs = index.build_queryset(using=using,
                                      start_date=self.start_date,
                                      end_date=self.end_date)

            total = qs.count()

            if self.verbosity >= 1:
                print(u"Indexing %d %s" %
                      (total, force_text(model._meta.verbose_name_plural)))

            batch_size = self.batchsize or backend.batch_size

            if self.workers > 0:
                ghetto_queue = []

            for start in range(0, total, batch_size):
                end = min(start + batch_size, total)

                if self.workers == 0:
                    do_update(backend,
                              index,
                              qs,
                              start,
                              end,
                              total,
                              verbosity=self.verbosity,
                              commit=self.commit)
                else:
                    ghetto_queue.append(('do_update', model, start, end, total,
                                         using, self.start_date, self.end_date,
                                         self.verbosity, self.commit))

            if self.workers > 0:
                pool = multiprocessing.Pool(self.workers)
                pool.map(worker, ghetto_queue)
                pool.close()
                pool.join()

            if self.remove:
                if self.start_date or self.end_date or total <= 0:
                    # They're using a reduced set, which may not incorporate
                    # all pks. Rebuild the list with everything.
                    qs = index.index_queryset().values_list('pk', flat=True)
                    pks_seen = set(smart_bytes(pk) for pk in qs)

                    total = len(pks_seen)
                else:
                    pks_seen = set(
                        smart_bytes(pk)
                        for pk in qs.values_list('pk', flat=True))

                if self.workers > 0:
                    ghetto_queue = []

                # Since records may still be in the search index but not the local database
                # we'll use that to create batches for processing.
                # See https://github.com/django-haystack/django-haystack/issues/1186
                index_total = SearchQuerySet(
                    using=backend.connection_alias).models(model).count()

                for start in range(0, index_total, batch_size):
                    upper_bound = start + batch_size

                    if self.workers == 0:
                        do_remove(backend,
                                  index,
                                  model,
                                  pks_seen,
                                  start,
                                  upper_bound,
                                  verbosity=self.verbosity,
                                  commit=self.commit)
                    else:
                        ghetto_queue.append(
                            ('do_remove', model, pks_seen, start, upper_bound,
                             using, self.verbosity, self.commit))

                if self.workers > 0:
                    pool = multiprocessing.Pool(self.workers)
                    pool.map(worker, ghetto_queue)
                    pool.terminate()

コード例 #15

0

ファイルを表示

ファイル: dev_update_index_reverse.py プロジェクト: Caimany/haystatck_ES1.6_v0.1

    def update_backend(self, label, using):
        from haystack.exceptions import NotHandled

        backend = haystack_connections[using].get_backend()
        unified_index = haystack_connections[using].get_unified_index()

        if self.workers > 0:
            import multiprocessing

        update_init_time = now()

        for model in haystack_get_models(label):

            try:
                index = unified_index.get_index(model)
            except NotHandled:
                if self.verbosity >= 2:
                    print("Skipping '%s' - no index." % model)
                continue

            if self.workers > 0:
                # workers resetting connections leads to references to models / connections getting
                # stale and having their connection disconnected from under them. Resetting before
                # the loop continues and it accesses the ORM makes it better.
                close_old_connections()

            qs = index.build_queryset(using=using, start_date=self.start_date,
                                      end_date=self.end_date)

            batch_size = self.batchsize or backend.batch_size

            if self.workers > 0:
                ghetto_queue = []

            total = qs.count()

            if self.endid:
                max_id = min(qs.reverse()[0].id, self.endid)
                print("endid %s   数据最大id为%s ，默认%s" % (self.endid, qs.reverse()[0].id, max_id))
            else:
                max_id = qs.reverse()[0].id
            min_id = self.startid

            for end in list(reversed(range(min_id, max_id + batch_size, batch_size))):
                start = max(end - batch_size, min_id)
                if end > max_id:
                    end = max_id
                # if  end+batch_size >= max_id - batch_size:
                #     end = max_id

                print('starid id : %s ~ %s' % (start, end))

                if self.workers == 0:
                    ##dev
                    if self.fk and self.fv:
                        filterkv = {self.fk: self.fv}
                    else:
                        filterkv = {}

                    index_pks = SearchQuerySet(using=backend.connection_alias).models(model)
                    do_update(backend, index, qs, start, end, total, verbosity=self.verbosity, commit=self.commit,index_pks=index_pks,
                              **filterkv)
                else:
                    ghetto_queue.append(('do_update', model, start, end, total, using, self.start_date, self.end_date,
                                         self.verbosity, self.commit))

            print("已更新到最新的id为 %s。。。" % (max_id))

            """
            从sqlite读写更新索引信息
            """

            update_stop_time = now()

            if not self.fv and self.fv != 0:
                city_num = 100
            else:
                city_num = self.fv
            print(update_init_time, "!!!!!!!!!!")

            if self.sqlite:
                # print("记录到sqlite中")
                try:
                    indexinfo = IndexINFO.get(city_num=city_num)

                    indexinfo.starttime = update_init_time
                    indexinfo.endtime = update_stop_time
                    indexinfo.last_index_total = UPDATE_TOTAL
                    indexinfo.last_index_id = max_id
                    indexinfo.save()

                except:

                    c = IndexINFO.create(city_num=city_num, starttime=update_init_time, endtime=update_stop_time,
                                         last_index_total=UPDATE_TOTAL, last_index_id=max_id)
                    c.save()
            else:
                print('')
                # print('如需保存结果，\n 请添加 "--sqlite True" 选项')

            # """
            #     删除已卸载数据
            # """

            # index_pks = SearchQuerySet(using=backend.connection_alias).models(model)
            # index_pks = index_pks.values_list('id')
            #
            if self.workers > 0:
                pool = multiprocessing.Pool(self.workers)
                pool.map(worker, ghetto_queue)
                pool.close()
                pool.join()

コード例 #16

0

ファイルを表示

ファイル: update_index.py プロジェクト: dionysio/django-haystack

    def update_backend(self, label, using):
        from haystack.exceptions import NotHandled

        backend = haystack_connections[using].get_backend()
        unified_index = haystack_connections[using].get_unified_index()

        if self.workers > 0:
            import multiprocessing

        for model in haystack_get_models(label):
            try:
                index = unified_index.get_index(model)
            except NotHandled:
                if self.verbosity >= 2:
                    print("Skipping '%s' - no index." % model)
                continue

            if self.workers > 0:
                # workers resetting connections leads to references to models / connections getting
                # stale and having their connection disconnected from under them. Resetting before
                # the loop continues and it accesses the ORM makes it better.
                close_old_connections()

            qs = index.build_queryset(using=using, start_date=self.start_date,
                                      end_date=self.end_date)

            total = qs.count()

            if self.verbosity >= 1:
                print(u"Indexing %d %s" % (total, force_text(model._meta.verbose_name_plural)))

            batch_size = self.batchsize or backend.batch_size

            if self.workers > 0:
                ghetto_queue = []

            for start in range(0, total, batch_size):
                end = min(start + batch_size, total)

                if self.workers == 0:
                    do_update(backend, index, qs, start, end, total, verbosity=self.verbosity, commit=self.commit)
                else:
                    ghetto_queue.append(('do_update', model, start, end, total, using, self.start_date, self.end_date, self.verbosity, self.commit))

            if self.workers > 0:
                pool = multiprocessing.Pool(self.workers)
                pool.map(worker, ghetto_queue)
                pool.close()
                pool.join()

            qs.update(indexable=0)

            if model.__name__ == 'Video':

                if self.remove:
                    deleted = index.get_deleted()

                    for video_id in deleted.values_list('video_id', flat=True):
                        backend.remove('main.video.'+video_id, commit=self.commit)

                    deleted.update(removed_from_index=True)

コード例 #17

0

ファイルを表示

ファイル: test_app_loading.py プロジェクト: aptivate/django-haystack

    def test_get_models_specific(self):
        from test_haystack.core.models import MockModel

        models = app_loading.haystack_get_models('core.MockModel')
        self.assertIsInstance(models, (list, GeneratorType))
        self.assertListEqual(models, [MockModel])

コード例 #18

0

ファイルを表示

ファイル: test_app_loading.py プロジェクト: aptivate/django-haystack

 def test_hierarchal_app_specific_model(self):
     models = app_loading.haystack_get_models('hierarchal_app_django.HierarchalAppModel')
     self.assertIsInstance(models, (list, GeneratorType))
     self.assertSetEqual(set(str(i._meta) for i in models),
                         set(('hierarchal_app_django.hierarchalappmodel', )))

コード例 #19

0

ファイルを表示

    def update_backend(self, label, using):
        from haystack.exceptions import NotHandled

        backend = haystack_connections[using].get_backend()
        unified_index = haystack_connections[using].get_unified_index()

        if self.workers > 0:
            import multiprocessing

        for model in haystack_get_models(label):
            try:
                index = unified_index.get_index(model)
            except NotHandled:
                if self.verbosity >= 2:
                    print("Skipping '%s' - no index." % model)
                continue

            if self.workers > 0:
                # workers resetting connections leads to references to models / connections getting
                # stale and having their connection disconnected from under them. Resetting before
                # the loop continues and it accesses the ORM makes it better.
                close_old_connections()

            qs = index.build_queryset(using=using,
                                      start_date=self.start_date,
                                      end_date=self.end_date)

            total = qs.count()

            if self.verbosity >= 1:
                print(u"Indexing %d %s" %
                      (total, force_text(model._meta.verbose_name_plural)))

            batch_size = self.batchsize or backend.batch_size

            if self.workers > 0:
                ghetto_queue = []

            for start in range(0, total, batch_size):
                end = min(start + batch_size, total)

                if self.workers == 0:
                    do_update(backend,
                              index,
                              qs,
                              start,
                              end,
                              total,
                              verbosity=self.verbosity,
                              commit=self.commit)
                else:
                    ghetto_queue.append(('do_update', model, start, end, total,
                                         using, self.start_date, self.end_date,
                                         self.verbosity, self.commit))

            if self.workers > 0:
                pool = multiprocessing.Pool(self.workers)
                pool.map(worker, ghetto_queue)
                pool.close()
                pool.join()

            if self.remove:
                if self.start_date or self.end_date or total <= 0:
                    # They're using a reduced set, which may not incorporate
                    # all pks. Rebuild the list with everything.
                    qs = index.index_queryset().values_list('pk', flat=True)
                    database_pks = set(smart_bytes(pk) for pk in qs)

                    total = len(database_pks)
                else:
                    database_pks = set(
                        smart_bytes(pk)
                        for pk in qs.values_list('pk', flat=True))

                # Since records may still be in the search index but not the local database
                # we'll use that to create batches for processing.
                # See https://github.com/django-haystack/django-haystack/issues/1186
                index_total = SearchQuerySet(
                    using=backend.connection_alias).models(model).count()

                # Retrieve PKs from the index. Note that this cannot be a numeric range query because although
                # pks are normally numeric they can be non-numeric UUIDs or other custom values. To reduce
                # load on the search engine, we only retrieve the pk field, which will be checked against the
                # full list obtained from the database, and the id field, which will be used to delete the
                # record should it be found to be stale.
                index_pks = SearchQuerySet(
                    using=backend.connection_alias).models(model)
                index_pks = index_pks.values_list('pk', 'id')

                # We'll collect all of the record IDs which are no longer present in the database and delete
                # them after walking the entire index. This uses more memory than the incremental approach but
                # avoids needing the pagination logic below to account for both commit modes:
                stale_records = set()

                for start in range(0, index_total, batch_size):
                    upper_bound = start + batch_size

                    # If the database pk is no longer present, queue the index key for removal:
                    for pk, rec_id in index_pks[start:upper_bound]:
                        if smart_bytes(pk) not in database_pks:
                            stale_records.add(rec_id)

                if stale_records:
                    if self.verbosity >= 1:
                        print("  removing %d stale records." %
                              len(stale_records))

                    for rec_id in stale_records:
                        # Since the PK was not in the database list, we'll delete the record from the search index:
                        if self.verbosity >= 2:
                            print("  removing %s." % rec_id)

                        backend.remove(rec_id, commit=self.commit)

コード例 #20

0

ファイルを表示

ファイル: search_indexes.py プロジェクト: zhoumingchun/nhaystack

def get_model(app_and_model):
    return haystack_get_models(app_and_model)[0]