def test_hierarchal_app_specific_model(self): models = app_loading.haystack_get_models( 'hierarchal_app_django.HierarchalAppModel') self.assertIsInstance(models, (list, GeneratorType)) self.assertSetEqual( set(str(i._meta) for i in models), set(('hierarchal_app_django.hierarchalappmodel', )))
def get_all_indexed_models(): backends = haystack_connections.connections_info.keys() available_models = {} for backend_key in backends: connection = haystack_connections[backend_key] backend = connection.get_backend() unified_index = haystack_connections[backend_key].get_unified_index() for app in haystack_load_apps(): for model in haystack_get_models(app): try: index = unified_index.get_index(model) except NotHandled: continue model_name = model.__module__ + '.' + model.__name__ available_models[model_name] = { 'backend_key': backend_key, 'backend': backend, 'app': app, 'model': model, 'index': index, } return available_models
def test_hierarchal_app_get_models(self): models = app_loading.haystack_get_models("hierarchal_app_django") self.assertIsInstance(models, (list, GeneratorType)) self.assertSetEqual( set(str(i._meta) for i in models), set(("hierarchal_app_django.hierarchalappsecondmodel", "hierarchal_app_django.hierarchalappmodel")), )
def test_hierarchal_app_get_models(self): models = app_loading.haystack_get_models("hierarchal_app_django") self.assertIsInstance(models, (list, GeneratorType)) self.assertSetEqual( set(str(i._meta) for i in models), set(( "hierarchal_app_django.hierarchalappsecondmodel", "hierarchal_app_django.hierarchalappmodel", )), )
def update_backend(self, label, using): backend = haystack_connections[using].get_backend() unified_index = haystack_connections[using].get_unified_index() for model in haystack_get_models(label): try: index = unified_index.get_index(model) except NotHandled: continue qs = index.build_queryset(using=using, start_date=self.start_date, end_date=self.end_date) total = qs.count() if self.verbosity >= 1: self.stdout.write( u"Indexing %d %s" % (total, plural(force_text(model._meta.verbose_name)))) batch_size = self.batchsize or backend.batch_size pbar = ProgressBar(widgets=[ '{0}: '.format(model._meta.verbose_name), Percentage(), ' (', Counter(), ') ', Bar(marker=RotatingMarker()), ' ', ETA() ], maxval=total).start() last_pk = None for start in range(0, total, batch_size): pbar.update(start) if not last_pk: last_pk = qs.only('pk').values_list('pk')[start][0] original_data = model.objects.using(using).filter( pk__gt=last_pk).prefetch_related( *index.get_prefetch()).order_by('pk')[:batch_size] actual_size = len(original_data) last_pk = original_data[actual_size - 1].pk do_update(backend, index, original_data, commit=self.commit, max_retries=self.max_retries) pbar.update(total) pbar.finish()
def get_ultimas_publicacoes_uma_por_tipo__nao_usada(self): search_models = model_choices() results = [] for m in search_models: sqs = SearchQuerySet().all() sqs = sqs.filter(at=0) sqs = sqs.models(*haystack_get_models(m[0])) sqs = sqs.order_by('-data', '-last_update')[:5] if len(sqs): results.append(sqs[0]) return results
def get_ultimas_publicacoes(self): sqs = SearchQuerySet().all() sqs = sqs.filter( Q(at=0) | Q(at__in=AreaTrabalho.objects.areatrabalho_publica().values_list( 'id', flat=True))) sqs = sqs.models( *haystack_get_models('protocoloadm.documentoadministrativo')) sqs = sqs.order_by('-data', '-last_update')[:100] r = [] for sr in sqs: if sr.object and sr.object._certidao: if sr.object._certidao.exists(): r.append(sr) if len(r) == 20: break return r
def test_get_models_specific(self): from test_haystack.core.models import MockModel models = app_loading.haystack_get_models("core.MockModel") self.assertIsInstance(models, (list, GeneratorType)) self.assertListEqual(models, [MockModel])
def test_get_models_all(self): models = app_loading.haystack_get_models("core") self.assertIsInstance(models, (list, GeneratorType))
def update_backend(self, label, using): backend = haystack_connections[using].get_backend() unified_index = haystack_connections[using].get_unified_index() for model in haystack_get_models(label): try: index = unified_index.get_index(model) except NotHandled: if self.verbosity >= 2: self.stdout.write("Skipping '%s' - no index." % model) continue if self.workers > 0: # workers resetting connections leads to references to models / connections getting # stale and having their connection disconnected from under them. Resetting before # the loop continues and it accesses the ORM makes it better. close_old_connections() qs = index.build_queryset(using=using, start_date=self.start_date, end_date=self.end_date) total = qs.count() if self.verbosity >= 1: self.stdout.write(u"Indexing %d %s" % ( total, force_text(model._meta.verbose_name_plural)) ) batch_size = self.batchsize or backend.batch_size if self.workers > 0: ghetto_queue = [] for start in range(0, total, batch_size): end = min(start + batch_size, total) if self.workers == 0: do_update(backend, index, qs, start, end, total, verbosity=self.verbosity, commit=self.commit, max_retries=self.max_retries) else: ghetto_queue.append((model, start, end, total, using, self.start_date, self.end_date, self.verbosity, self.commit, self.max_retries)) if self.workers > 0: pool = multiprocessing.Pool(self.workers) successful_tasks = pool.map(update_worker, ghetto_queue) if len(ghetto_queue) != len(successful_tasks): self.stderr.write('Queued %d tasks but only %d completed' % (len(ghetto_queue), len(successful_tasks))) for i in ghetto_queue: if i not in successful_tasks: self.stderr.write('Incomplete task: %s' % repr(i)) pool.close() pool.join() if self.remove: if self.start_date or self.end_date or total <= 0: # They're using a reduced set, which may not incorporate # all pks. Rebuild the list with everything. qs = index.index_queryset().values_list('pk', flat=True) database_pks = set(smart_bytes(pk) for pk in qs) total = len(database_pks) else: database_pks = set(smart_bytes(pk) for pk in qs.values_list('pk', flat=True)) # Since records may still be in the search index but not the local database # we'll use that to create batches for processing. # See https://github.com/django-haystack/django-haystack/issues/1186 index_total = SearchQuerySet(using=backend.connection_alias).models(model).count() # Retrieve PKs from the index. Note that this cannot be a numeric range query because although # pks are normally numeric they can be non-numeric UUIDs or other custom values. To reduce # load on the search engine, we only retrieve the pk field, which will be checked against the # full list obtained from the database, and the id field, which will be used to delete the # record should it be found to be stale. index_pks = SearchQuerySet(using=backend.connection_alias).models(model) index_pks = index_pks.values_list('pk', 'id') # We'll collect all of the record IDs which are no longer present in the database and delete # them after walking the entire index. This uses more memory than the incremental approach but # avoids needing the pagination logic below to account for both commit modes: stale_records = set() for start in range(0, index_total, batch_size): upper_bound = start + batch_size # If the database pk is no longer present, queue the index key for removal: for pk, rec_id in index_pks[start:upper_bound]: if smart_bytes(pk) not in database_pks: stale_records.add(rec_id) if stale_records: if self.verbosity >= 1: self.stdout.write(" removing %d stale records." % len(stale_records)) for rec_id in stale_records: # Since the PK was not in the database list, we'll delete the record from the search # index: if self.verbosity >= 2: self.stdout.write(" removing %s." % rec_id) backend.remove(rec_id, commit=self.commit)
def update_backend(self, label, using): from haystack.exceptions import NotHandled backend = haystack_connections[using].get_backend() unified_index = haystack_connections[using].get_unified_index() if self.workers > 0: import multiprocessing for model in haystack_get_models(label): try: index = unified_index.get_index(model) except NotHandled: if self.verbosity >= 2: print("Skipping '%s' - no index." % model) continue if self.workers > 0: # workers resetting connections leads to references to models / connections getting # stale and having their connection disconnected from under them. Resetting before # the loop continues and it accesses the ORM makes it better. close_old_connections() qs = index.build_queryset(using=using, start_date=self.start_date, end_date=self.end_date) total = qs.count() if self.verbosity >= 1: print(u"Indexing %d %s" % (total, force_text(model._meta.verbose_name_plural))) batch_size = self.batchsize or backend.batch_size if self.workers > 0: ghetto_queue = [] for start in range(0, total, batch_size): end = min(start + batch_size, total) if self.workers == 0: do_update(backend, index, qs, start, end, total, verbosity=self.verbosity, commit=self.commit) else: ghetto_queue.append(('do_update', model, start, end, total, using, self.start_date, self.end_date, self.verbosity, self.commit)) if self.workers > 0: pool = multiprocessing.Pool(self.workers) pool.map(worker, ghetto_queue) pool.close() pool.join() if self.remove: if self.start_date or self.end_date or total <= 0: # They're using a reduced set, which may not incorporate # all pks. Rebuild the list with everything. qs = index.index_queryset().values_list('pk', flat=True) pks_seen = set(smart_bytes(pk) for pk in qs) total = len(pks_seen) else: pks_seen = set(smart_bytes(pk) for pk in qs.values_list('pk', flat=True)) if self.workers > 0: ghetto_queue = [] for start in range(0, total, batch_size): upper_bound = start + batch_size if self.workers == 0: do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=self.verbosity, commit=self.commit) else: ghetto_queue.append(('do_remove', model, pks_seen, start, upper_bound, using, self.verbosity, self.commit)) if self.workers > 0: pool = multiprocessing.Pool(self.workers) pool.map(worker, ghetto_queue) pool.terminate()
def update_backend(self, label, using): from haystack.exceptions import NotHandled backend = haystack_connections[using].get_backend() unified_index = haystack_connections[using].get_unified_index() if self.workers > 0: import multiprocessing for model in haystack_get_models(label): try: index = unified_index.get_index(model) except NotHandled: if self.verbosity >= 2: print("Skipping '%s' - no index." % model) continue if self.workers > 0: # workers resetting connections leads to references to models / connections getting # stale and having their connection disconnected from under them. Resetting before # the loop continues and it accesses the ORM makes it better. close_old_connections() qs = index.build_queryset(using=using, start_date=self.start_date, end_date=self.end_date) # total 为 id 最大那个 # modify max_id = qs.reverse()[0].id total = qs.count() print("id 最大为 %s" % (max_id)) if self.verbosity >= 1: print(u"Indexing %d %s" % (total, force_text(model._meta.verbose_name_plural))) batch_size = self.batchsize or backend.batch_size if self.workers > 0: ghetto_queue = [] # modify # for start in range(0, total, batch_size): # end = min(start + batch_size, total) ### custom start id print(self.startid) for start in range(self.startid, max_id, batch_size): end = min(start + batch_size, max_id) if self.workers == 0: do_update(backend, index, qs, start, end, total, verbosity=self.verbosity, commit=self.commit) else: ghetto_queue.append(('do_update', model, start, end, total, using, self.start_date, self.end_date, self.verbosity, self.commit)) if self.workers > 0: pool = multiprocessing.Pool(self.workers) pool.map(worker, ghetto_queue) pool.close() pool.join()
def test_get_models_all(self): models = app_loading.haystack_get_models('core') self.assertIsInstance(models, (list, GeneratorType))
def update_backend(self, label, using): from haystack.exceptions import NotHandled backend = haystack_connections[using].get_backend() unified_index = haystack_connections[using].get_unified_index() if self.workers > 0: import multiprocessing for model in haystack_get_models(label): try: index = unified_index.get_index(model) except NotHandled: if self.verbosity >= 2: print("Skipping '%s' - no index." % model) continue if self.workers > 0: # workers resetting connections leads to references to models / connections getting # stale and having their connection disconnected from under them. Resetting before # the loop continues and it accesses the ORM makes it better. db.close_connection() qs = index.build_queryset(using=using, start_date=self.start_date, end_date=self.end_date) total = qs.count() if self.verbosity >= 1: print(u"Indexing %d %s" % (total, force_text(model._meta.verbose_name_plural))) batch_size = self.batchsize or backend.batch_size if self.workers > 0: ghetto_queue = [] for start in range(0, total, batch_size): end = min(start + batch_size, total) if self.workers == 0: do_update(backend, index, qs, start, end, total, verbosity=self.verbosity, commit=self.commit) else: ghetto_queue.append(('do_update', model, start, end, total, using, self.start_date, self.end_date, self.verbosity, self.commit)) if self.workers > 0: pool = multiprocessing.Pool(self.workers) pool.map(worker, ghetto_queue) pool.close() pool.join() if self.remove: if self.start_date or self.end_date or total <= 0: # They're using a reduced set, which may not incorporate # all pks. Rebuild the list with everything. qs = index.index_queryset().values_list('pk', flat=True) pks_seen = set(smart_bytes(pk) for pk in qs) total = len(pks_seen) else: pks_seen = set( smart_bytes(pk) for pk in qs.values_list('pk', flat=True)) if self.workers > 0: ghetto_queue = [] # Since records may still be in the search index but not the local database # we'll use that to create batches for processing. # See https://github.com/django-haystack/django-haystack/issues/1186 index_total = SearchQuerySet( using=backend.connection_alias).models(model).count() for start in range(0, index_total, batch_size): upper_bound = start + batch_size if self.workers == 0: do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=self.verbosity, commit=self.commit) else: ghetto_queue.append( ('do_remove', model, pks_seen, start, upper_bound, using, self.verbosity, self.commit)) if self.workers > 0: pool = multiprocessing.Pool(self.workers) pool.map(worker, ghetto_queue) pool.terminate()
def update_backend(self, label, using): from haystack.exceptions import NotHandled backend = haystack_connections[using].get_backend() unified_index = haystack_connections[using].get_unified_index() if self.workers > 0: import multiprocessing update_init_time = now() for model in haystack_get_models(label): try: index = unified_index.get_index(model) except NotHandled: if self.verbosity >= 2: print("Skipping '%s' - no index." % model) continue if self.workers > 0: # workers resetting connections leads to references to models / connections getting # stale and having their connection disconnected from under them. Resetting before # the loop continues and it accesses the ORM makes it better. close_old_connections() qs = index.build_queryset(using=using, start_date=self.start_date, end_date=self.end_date) batch_size = self.batchsize or backend.batch_size if self.workers > 0: ghetto_queue = [] total = qs.count() if self.endid: max_id = min(qs.reverse()[0].id, self.endid) print("endid %s 数据最大id为%s ,默认%s" % (self.endid, qs.reverse()[0].id, max_id)) else: max_id = qs.reverse()[0].id min_id = self.startid for end in list(reversed(range(min_id, max_id + batch_size, batch_size))): start = max(end - batch_size, min_id) if end > max_id: end = max_id # if end+batch_size >= max_id - batch_size: # end = max_id print('starid id : %s ~ %s' % (start, end)) if self.workers == 0: ##dev if self.fk and self.fv: filterkv = {self.fk: self.fv} else: filterkv = {} index_pks = SearchQuerySet(using=backend.connection_alias).models(model) do_update(backend, index, qs, start, end, total, verbosity=self.verbosity, commit=self.commit,index_pks=index_pks, **filterkv) else: ghetto_queue.append(('do_update', model, start, end, total, using, self.start_date, self.end_date, self.verbosity, self.commit)) print("已更新到最新的id为 %s。。。" % (max_id)) """ 从sqlite读写更新索引信息 """ update_stop_time = now() if not self.fv and self.fv != 0: city_num = 100 else: city_num = self.fv print(update_init_time, "!!!!!!!!!!") if self.sqlite: # print("记录到sqlite中") try: indexinfo = IndexINFO.get(city_num=city_num) indexinfo.starttime = update_init_time indexinfo.endtime = update_stop_time indexinfo.last_index_total = UPDATE_TOTAL indexinfo.last_index_id = max_id indexinfo.save() except: c = IndexINFO.create(city_num=city_num, starttime=update_init_time, endtime=update_stop_time, last_index_total=UPDATE_TOTAL, last_index_id=max_id) c.save() else: print('') # print('如需保存结果,\n 请添加 "--sqlite True" 选项') # """ # 删除已卸载数据 # """ # index_pks = SearchQuerySet(using=backend.connection_alias).models(model) # index_pks = index_pks.values_list('id') # if self.workers > 0: pool = multiprocessing.Pool(self.workers) pool.map(worker, ghetto_queue) pool.close() pool.join()
def update_backend(self, label, using): from haystack.exceptions import NotHandled backend = haystack_connections[using].get_backend() unified_index = haystack_connections[using].get_unified_index() if self.workers > 0: import multiprocessing for model in haystack_get_models(label): try: index = unified_index.get_index(model) except NotHandled: if self.verbosity >= 2: print("Skipping '%s' - no index." % model) continue if self.workers > 0: # workers resetting connections leads to references to models / connections getting # stale and having their connection disconnected from under them. Resetting before # the loop continues and it accesses the ORM makes it better. close_old_connections() qs = index.build_queryset(using=using, start_date=self.start_date, end_date=self.end_date) total = qs.count() if self.verbosity >= 1: print(u"Indexing %d %s" % (total, force_text(model._meta.verbose_name_plural))) batch_size = self.batchsize or backend.batch_size if self.workers > 0: ghetto_queue = [] for start in range(0, total, batch_size): end = min(start + batch_size, total) if self.workers == 0: do_update(backend, index, qs, start, end, total, verbosity=self.verbosity, commit=self.commit) else: ghetto_queue.append(('do_update', model, start, end, total, using, self.start_date, self.end_date, self.verbosity, self.commit)) if self.workers > 0: pool = multiprocessing.Pool(self.workers) pool.map(worker, ghetto_queue) pool.close() pool.join() qs.update(indexable=0) if model.__name__ == 'Video': if self.remove: deleted = index.get_deleted() for video_id in deleted.values_list('video_id', flat=True): backend.remove('main.video.'+video_id, commit=self.commit) deleted.update(removed_from_index=True)
def test_get_models_specific(self): from test_haystack.core.models import MockModel models = app_loading.haystack_get_models('core.MockModel') self.assertIsInstance(models, (list, GeneratorType)) self.assertListEqual(models, [MockModel])
def test_hierarchal_app_specific_model(self): models = app_loading.haystack_get_models('hierarchal_app_django.HierarchalAppModel') self.assertIsInstance(models, (list, GeneratorType)) self.assertSetEqual(set(str(i._meta) for i in models), set(('hierarchal_app_django.hierarchalappmodel', )))
def update_backend(self, label, using): from haystack.exceptions import NotHandled backend = haystack_connections[using].get_backend() unified_index = haystack_connections[using].get_unified_index() if self.workers > 0: import multiprocessing for model in haystack_get_models(label): try: index = unified_index.get_index(model) except NotHandled: if self.verbosity >= 2: print("Skipping '%s' - no index." % model) continue if self.workers > 0: # workers resetting connections leads to references to models / connections getting # stale and having their connection disconnected from under them. Resetting before # the loop continues and it accesses the ORM makes it better. close_old_connections() qs = index.build_queryset(using=using, start_date=self.start_date, end_date=self.end_date) total = qs.count() if self.verbosity >= 1: print(u"Indexing %d %s" % (total, force_text(model._meta.verbose_name_plural))) batch_size = self.batchsize or backend.batch_size if self.workers > 0: ghetto_queue = [] for start in range(0, total, batch_size): end = min(start + batch_size, total) if self.workers == 0: do_update(backend, index, qs, start, end, total, verbosity=self.verbosity, commit=self.commit) else: ghetto_queue.append(('do_update', model, start, end, total, using, self.start_date, self.end_date, self.verbosity, self.commit)) if self.workers > 0: pool = multiprocessing.Pool(self.workers) pool.map(worker, ghetto_queue) pool.close() pool.join() if self.remove: if self.start_date or self.end_date or total <= 0: # They're using a reduced set, which may not incorporate # all pks. Rebuild the list with everything. qs = index.index_queryset().values_list('pk', flat=True) database_pks = set(smart_bytes(pk) for pk in qs) total = len(database_pks) else: database_pks = set( smart_bytes(pk) for pk in qs.values_list('pk', flat=True)) # Since records may still be in the search index but not the local database # we'll use that to create batches for processing. # See https://github.com/django-haystack/django-haystack/issues/1186 index_total = SearchQuerySet( using=backend.connection_alias).models(model).count() # Retrieve PKs from the index. Note that this cannot be a numeric range query because although # pks are normally numeric they can be non-numeric UUIDs or other custom values. To reduce # load on the search engine, we only retrieve the pk field, which will be checked against the # full list obtained from the database, and the id field, which will be used to delete the # record should it be found to be stale. index_pks = SearchQuerySet( using=backend.connection_alias).models(model) index_pks = index_pks.values_list('pk', 'id') # We'll collect all of the record IDs which are no longer present in the database and delete # them after walking the entire index. This uses more memory than the incremental approach but # avoids needing the pagination logic below to account for both commit modes: stale_records = set() for start in range(0, index_total, batch_size): upper_bound = start + batch_size # If the database pk is no longer present, queue the index key for removal: for pk, rec_id in index_pks[start:upper_bound]: if smart_bytes(pk) not in database_pks: stale_records.add(rec_id) if stale_records: if self.verbosity >= 1: print(" removing %d stale records." % len(stale_records)) for rec_id in stale_records: # Since the PK was not in the database list, we'll delete the record from the search index: if self.verbosity >= 2: print(" removing %s." % rec_id) backend.remove(rec_id, commit=self.commit)
def get_model(app_and_model): return haystack_get_models(app_and_model)[0]