def _get_models(self, args): """ Get Models from registry that match the --models args. """ if args: models = [] for arg in args: arg = arg.lower() match_found = False for model in registry.get_models(): if model._meta.app_label == arg: models.append(model) match_found = True elif model._meta.model_name.lower() == arg: models.append(model) match_found = True elif '{}.{}'.format(model._meta.app_label.lower(), model._meta.model_name.lower()) == arg: models.append(model) match_found = True if not match_found: raise CommandError("No model or app named {}".format(arg)) else: models = registry.get_models() return set(models)
def _get_models(self, args): """ Get Models from registry that match the --models args. """ if args: models = [] for arg in args: arg = arg.lower() match_found = False for model in registry.get_models(): if model._meta.app_label == arg: models.append(model) match_found = True elif model._meta.model_name.lower() == arg: models.append(model) match_found = True elif '{}.{}'.format(model._meta.app_label.lower(), model._meta.model_name.lower()) == arg: models.append(model) match_found = True if not match_found: raise CommandError("No model or app named {}".format(arg)) else: models = registry.get_models() return set(models)
def handle(self, *args, **options): self.stdout.write("# Running migrations") call_command("migrate") self.stdout.write("# Site settings") site = Site.objects.get_current() site.name = settings.SITE_NAME site.domain = settings.REAL_HOST site.save() Site.objects.clear_cache() if settings.ELASTICSEARCH_ENABLED: self.stdout.write("# Creating elasticsearch indices") # The logic comes from django_elasticsearch_dsl.managment.commands.search_index:_create for index in registry.get_indices(registry.get_models()): # noinspection PyProtectedMember self.stdout.write( f"Creating elasticsearch index '{index._name}' if not exists" ) # https://elasticsearch-py.readthedocs.io/en/master/api.html: # "ignore 400 cause by IndexAlreadyExistsException when creating an index" # See also https://github.com/elastic/elasticsearch/issues/19862 index.create(ignore=400) else: self.stdout.write( "# Elasticsearch is disabled; Not creating any indices") # This is more brittle, so we run it last self.stdout.write("# Creating minio buckets") setup_minio() logger.info("Setup successful")
def update_index(): models = registry.get_models() for doc in registry.get_documents(models): qs = doc().get_queryset() log.info('indexing {} "{}" objects'.format( qs.count(), doc._doc_type.model.__name__)) doc().update(qs)
def update_index(): models = registry.get_models() for doc in registry.get_documents(models): qs = doc().get_queryset() log.info('indexing {} "{}" objects'.format( qs.count(), doc._doc_type.model.__name__) ) doc().update(qs)
def test_index_deletion(): """Check that deleted persons get deleted from the elasticsearch index""" for index in registry.get_indices(registry.get_models()): index.delete(ignore=404) for index in registry.get_indices(registry.get_models()): index.create() old_persons = [ Person(name="Frank Underwood", party="Democrats"), Person(name="Claire Underwood", party="Democrats"), ] new_persons = [Person(name="Claire Underwood", party="Democrats")] old = RisData(sample_city, None, old_persons, [], [], [], [], [], [], 2) new = RisData(sample_city, None, new_persons, [], [], [], [], [], [], 2) body = Body(name=old.meta.name, short_name=old.meta.name, ags=old.meta.ags) body.save() import_data(body, old) assert len(MainappSearch({"query": "Underwood"}).execute().hits) == 2 import_data(body, new) assert len(MainappSearch({"query": "Underwood"}).execute().hits) == 1
def handle(self, *args, **options): """This command is based off of the 'populate' command of Django ES DSL: https://github.com/sabricot/django-elasticsearch-dsl/blob/f6b2e0694e4ed69826c824196ccec5863874c856/django_elasticsearch_dsl/management/commands/search_index.py#L86 We have updated it so that it will do incremental updates rather than looping over the full queryset every time. """ models = set(registry.get_models()) for doc in registry.get_documents(models): start_time = timezone.now() - UPDATE_WINDOW qs = doc().get_queryset().filter(last_modified__gt=start_time).order_by("id") self.stdout.write("Indexing {} '{}' objects".format(qs.count(), qs.model.__name__)) doc().update(qs)
def handle(self, *args, **options): self.stdout.write("Running migrations") call_command("migrate") self.stdout.write("Creating minio buckets") setup_minio() if settings.ELASTICSEARCH_ENABLED: self.stdout.write("Creating elasticsearch indices") # The logic comes from django_elasticsearch_dsl.managment.commands.search_index:_create for index in registry.get_indices(registry.get_models()): self.stdout.write( "Creating elasticsearch index '{}' if not exists".format( index._name ) ) # https://elasticsearch-py.readthedocs.io/en/master/api.html: # "ignore 400 cause by IndexAlreadyExistsException when creating an index" # See also https://github.com/elastic/elasticsearch/issues/19862 index.create(ignore=400) else: self.stdout.write("Elasticsearch is disabled; Not creating any indices")
def incremental_import( current_model: Type[django.db.models.Model], json_objects: Iterable[Dict[str, Any]], soft_delete: bool = True, ): """Compared the objects in the database with the json data for a given objects and creates, updates and (soft-)deletes the appropriate records.""" json_map = dict() for json_dict in json_objects: key = tuple(json_dict[j] for j in unique_field_dict[current_model]) json_map[key] = json_dict # Remove manually deleted files if current_model == models.File: # noinspection PyUnresolvedReferences manually_deleted = current_model.objects_with_deleted.filter( manually_deleted=True).values_list("oparl_id", flat=True) for i in manually_deleted: if (i, ) in json_map: del json_map[(i, )] # Handle undeleted objects, e.g. papers that disappeared and reappeared if issubclass(current_model, DefaultFields): deleted = current_model.objects_with_deleted.filter( deleted=True, oparl_id__isnull=False).values_list("oparl_id", flat=True) oparls_ids = [i.get("oparl_id") for i in json_objects] to_undelete = set(deleted) & set(oparls_ids) if to_undelete: logger.info( f"{current_model.__name__}: Undeleting {len(to_undelete)}") current_model.objects_with_deleted.filter( oparl_id__in=to_undelete).update(deleted=False) db_ids, db_map = get_from_db(current_model) common = set(json_map.keys()) & set(db_map.keys()) to_be_created = set(json_map.keys()) - common to_be_deleted = set(db_map.keys()) - common to_be_updated = [] for existing in common: if json_map[existing] != db_map[existing]: to_be_updated.append((json_map[existing], db_ids[existing])) # We need to delete first and then create to avoid conflicts e.g. when the start of a meeting with an oparl_id # changed deletion_ids = [db_ids[i1] for i1 in to_be_deleted] logger.info(f"{current_model.__name__}: " f"Deleting {len(to_be_deleted)}, " f"Creating {len(to_be_created)} and " f"Updating {len(to_be_updated)}") # Since we don't get the bulk created object ids back from django (yet?), # we just do this by timestamp - indexing more that necessary isn't wrong anyway before_bulk_create = timezone.now() if soft_delete: deleted_rows = current_model.objects.filter( id__in=deletion_ids).update(deleted=True, modified=timezone.now()) else: current_model.objects.filter(id__in=deletion_ids).delete() deleted_rows = 0 # TODO: Delete files to_be_created = [current_model(**json_map[i1]) for i1 in to_be_created] current_model.objects.bulk_create(to_be_created, batch_size=100) # Bulk create doesn't update the search index, so we do this manually if settings.ELASTICSEARCH_ENABLED and current_model in registry.get_models( ): # Changed/Created qs = current_model.objects.filter(modified__gte=before_bulk_create) qs_count = qs.count() assert qs_count >= len( to_be_created ), f"Only {qs_count} {current_model.__name__} were found for indexing, while at least {len(to_be_created)} were expected" logger.info( f"Indexing {qs_count} {current_model.__name__} new objects") search_bulk_index(current_model, qs) # Deleted qs = current_model.objects_with_deleted.filter( deleted=True, modified__gte=before_bulk_create) qs_count = qs.count() assert ( qs_count >= deleted_rows ), f"Only {qs_count} {current_model.__name__} for deletion, while at least {deleted_rows} were expected" logger.info( f"Deleting {qs_count} {current_model.__name__} from elasticsearch") search_bulk_index(current_model, qs, action="delete") with transaction.atomic(): for json_object, pk in tqdm( to_be_updated, disable=not to_be_updated, desc=f"Update or create for {current_model.__name__}", ): current_model.objects_with_deleted.update_or_create( pk=pk, defaults=json_object)
def _get_models(self): models = registry.get_models() return set(models)
def handle_m2m_changed(self, sender, instance, action, **kwargs): if settings.ES_SYNC and instance.__class__ in registry.get_models(): handle_m2m_changed.delay(instance.app_name, instance.model_name, instance.id, action)
def handle_save(self, sender, instance, **kwargs): if settings.ES_SYNC and instance.__class__ in registry.get_models(): handle_save.delay(instance.app_name, instance.model_name, instance.id)
def setup_method(self): models = registry.get_models() for index in registry.get_indices(models): index.delete(ignore=404) index.create()