Exemple #1
0
    def _get_models(self, args):
        """
        Get Models from registry that match the --models args.
        """
        if args:
            models = []
            for arg in args:
                arg = arg.lower()
                match_found = False

                for model in registry.get_models():
                    if model._meta.app_label == arg:
                        models.append(model)
                        match_found = True
                    elif model._meta.model_name.lower() == arg:
                        models.append(model)
                        match_found = True
                    elif '{}.{}'.format(model._meta.app_label.lower(),
                                        model._meta.model_name.lower()) == arg:
                        models.append(model)
                        match_found = True

                if not match_found:
                    raise CommandError("No model or app named {}".format(arg))
        else:
            models = registry.get_models()

        return set(models)
Exemple #2
0
    def _get_models(self, args):
        """
        Get Models from registry that match the --models args.
        """
        if args:
            models = []
            for arg in args:
                arg = arg.lower()
                match_found = False

                for model in registry.get_models():
                    if model._meta.app_label == arg:
                        models.append(model)
                        match_found = True
                    elif model._meta.model_name.lower() == arg:
                        models.append(model)
                        match_found = True
                    elif '{}.{}'.format(model._meta.app_label.lower(), model._meta.model_name.lower()) == arg:
                        models.append(model)
                        match_found = True

                if not match_found:
                    raise CommandError("No model or app named {}".format(arg))
        else:
            models = registry.get_models()

        return set(models)
    def handle(self, *args, **options):
        self.stdout.write("# Running migrations")
        call_command("migrate")

        self.stdout.write("# Site settings")
        site = Site.objects.get_current()
        site.name = settings.SITE_NAME
        site.domain = settings.REAL_HOST
        site.save()
        Site.objects.clear_cache()

        if settings.ELASTICSEARCH_ENABLED:
            self.stdout.write("# Creating elasticsearch indices")
            # The logic comes from django_elasticsearch_dsl.managment.commands.search_index:_create
            for index in registry.get_indices(registry.get_models()):
                # noinspection PyProtectedMember
                self.stdout.write(
                    f"Creating elasticsearch index '{index._name}' if not exists"
                )
                # https://elasticsearch-py.readthedocs.io/en/master/api.html:
                # "ignore 400 cause by IndexAlreadyExistsException when creating an index"
                # See also https://github.com/elastic/elasticsearch/issues/19862
                index.create(ignore=400)
        else:
            self.stdout.write(
                "# Elasticsearch is disabled; Not creating any indices")

        # This is more brittle, so we run it last
        self.stdout.write("# Creating minio buckets")
        setup_minio()
        logger.info("Setup successful")
Exemple #4
0
def update_index():

    models = registry.get_models()
    for doc in registry.get_documents(models):
        qs = doc().get_queryset()
        log.info('indexing {} "{}" objects'.format(
            qs.count(), doc._doc_type.model.__name__))
        doc().update(qs)
Exemple #5
0
def update_index():

    models = registry.get_models()
    for doc in registry.get_documents(models):
        qs = doc().get_queryset()
        log.info('indexing {} "{}" objects'.format(
            qs.count(), doc._doc_type.model.__name__)
        )
        doc().update(qs)
def test_index_deletion():
    """Check that deleted persons get deleted from the elasticsearch index"""
    for index in registry.get_indices(registry.get_models()):
        index.delete(ignore=404)
    for index in registry.get_indices(registry.get_models()):
        index.create()

    old_persons = [
        Person(name="Frank Underwood", party="Democrats"),
        Person(name="Claire Underwood", party="Democrats"),
    ]
    new_persons = [Person(name="Claire Underwood", party="Democrats")]

    old = RisData(sample_city, None, old_persons, [], [], [], [], [], [], 2)
    new = RisData(sample_city, None, new_persons, [], [], [], [], [], [], 2)
    body = Body(name=old.meta.name, short_name=old.meta.name, ags=old.meta.ags)
    body.save()

    import_data(body, old)
    assert len(MainappSearch({"query": "Underwood"}).execute().hits) == 2
    import_data(body, new)
    assert len(MainappSearch({"query": "Underwood"}).execute().hits) == 1
    def handle(self, *args, **options):
        """This command is based off of the 'populate' command of Django ES DSL:

        https://github.com/sabricot/django-elasticsearch-dsl/blob/f6b2e0694e4ed69826c824196ccec5863874c856/django_elasticsearch_dsl/management/commands/search_index.py#L86

        We have updated it so that it will do incremental updates
        rather than looping over the full queryset every time.
        """
        models = set(registry.get_models())

        for doc in registry.get_documents(models):
            start_time = timezone.now() - UPDATE_WINDOW
            qs = doc().get_queryset().filter(last_modified__gt=start_time).order_by("id")
            self.stdout.write("Indexing {} '{}' objects".format(qs.count(), qs.model.__name__))
            doc().update(qs)
Exemple #8
0
 def handle(self, *args, **options):
     self.stdout.write("Running migrations")
     call_command("migrate")
     self.stdout.write("Creating minio buckets")
     setup_minio()
     if settings.ELASTICSEARCH_ENABLED:
         self.stdout.write("Creating elasticsearch indices")
         # The logic comes from django_elasticsearch_dsl.managment.commands.search_index:_create
         for index in registry.get_indices(registry.get_models()):
             self.stdout.write(
                 "Creating elasticsearch index '{}' if not exists".format(
                     index._name
                 )
             )
             # https://elasticsearch-py.readthedocs.io/en/master/api.html:
             # "ignore 400 cause by IndexAlreadyExistsException when creating an index"
             # See also https://github.com/elastic/elasticsearch/issues/19862
             index.create(ignore=400)
     else:
         self.stdout.write("Elasticsearch is disabled; Not creating any indices")
def incremental_import(
    current_model: Type[django.db.models.Model],
    json_objects: Iterable[Dict[str, Any]],
    soft_delete: bool = True,
):
    """Compared the objects in the database with the json data for a given objects and
    creates, updates and (soft-)deletes the appropriate records."""

    json_map = dict()
    for json_dict in json_objects:
        key = tuple(json_dict[j] for j in unique_field_dict[current_model])
        json_map[key] = json_dict

    # Remove manually deleted files
    if current_model == models.File:
        # noinspection PyUnresolvedReferences
        manually_deleted = current_model.objects_with_deleted.filter(
            manually_deleted=True).values_list("oparl_id", flat=True)
        for i in manually_deleted:
            if (i, ) in json_map:
                del json_map[(i, )]

    # Handle undeleted objects, e.g. papers that disappeared and reappeared
    if issubclass(current_model, DefaultFields):
        deleted = current_model.objects_with_deleted.filter(
            deleted=True, oparl_id__isnull=False).values_list("oparl_id",
                                                              flat=True)
        oparls_ids = [i.get("oparl_id") for i in json_objects]
        to_undelete = set(deleted) & set(oparls_ids)
        if to_undelete:
            logger.info(
                f"{current_model.__name__}: Undeleting {len(to_undelete)}")
            current_model.objects_with_deleted.filter(
                oparl_id__in=to_undelete).update(deleted=False)

    db_ids, db_map = get_from_db(current_model)

    common = set(json_map.keys()) & set(db_map.keys())
    to_be_created = set(json_map.keys()) - common
    to_be_deleted = set(db_map.keys()) - common
    to_be_updated = []
    for existing in common:
        if json_map[existing] != db_map[existing]:
            to_be_updated.append((json_map[existing], db_ids[existing]))

    # We need to delete first and then create to avoid conflicts e.g. when the start of a meeting with an oparl_id
    # changed
    deletion_ids = [db_ids[i1] for i1 in to_be_deleted]
    logger.info(f"{current_model.__name__}: "
                f"Deleting {len(to_be_deleted)}, "
                f"Creating {len(to_be_created)} and "
                f"Updating {len(to_be_updated)}")
    # Since we don't get the bulk created object ids back from django (yet?),
    # we just do this by timestamp - indexing more that necessary isn't wrong anyway
    before_bulk_create = timezone.now()

    if soft_delete:
        deleted_rows = current_model.objects.filter(
            id__in=deletion_ids).update(deleted=True, modified=timezone.now())
    else:
        current_model.objects.filter(id__in=deletion_ids).delete()
        deleted_rows = 0
    # TODO: Delete files

    to_be_created = [current_model(**json_map[i1]) for i1 in to_be_created]
    current_model.objects.bulk_create(to_be_created, batch_size=100)

    # Bulk create doesn't update the search index, so we do this manually
    if settings.ELASTICSEARCH_ENABLED and current_model in registry.get_models(
    ):
        # Changed/Created
        qs = current_model.objects.filter(modified__gte=before_bulk_create)
        qs_count = qs.count()
        assert qs_count >= len(
            to_be_created
        ), f"Only {qs_count} {current_model.__name__} were found for indexing, while at least {len(to_be_created)} were expected"
        logger.info(
            f"Indexing {qs_count} {current_model.__name__} new objects")
        search_bulk_index(current_model, qs)
        # Deleted
        qs = current_model.objects_with_deleted.filter(
            deleted=True, modified__gte=before_bulk_create)
        qs_count = qs.count()
        assert (
            qs_count >= deleted_rows
        ), f"Only {qs_count} {current_model.__name__} for deletion, while at least {deleted_rows} were expected"
        logger.info(
            f"Deleting {qs_count} {current_model.__name__} from elasticsearch")
        search_bulk_index(current_model, qs, action="delete")

    with transaction.atomic():
        for json_object, pk in tqdm(
                to_be_updated,
                disable=not to_be_updated,
                desc=f"Update or create for {current_model.__name__}",
        ):
            current_model.objects_with_deleted.update_or_create(
                pk=pk, defaults=json_object)
 def _get_models(self):
     models = registry.get_models()
     return set(models)
Exemple #11
0
 def handle_m2m_changed(self, sender, instance, action, **kwargs):
     if settings.ES_SYNC and instance.__class__ in registry.get_models():
         handle_m2m_changed.delay(instance.app_name, instance.model_name, instance.id, action)
Exemple #12
0
 def handle_save(self, sender, instance, **kwargs):
     if settings.ES_SYNC and instance.__class__ in registry.get_models():
         handle_save.delay(instance.app_name, instance.model_name, instance.id)
Exemple #13
0
 def setup_method(self):
     models = registry.get_models()
     for index in registry.get_indices(models):
         index.delete(ignore=404)
         index.create()