def haystack_batch_update(app_label, model_name, pks=None, start=None,
                          end=None, date_lookup=None, batch_size=100,
                          remove=True):
    """
    Batches haystack index updates for the given model. If no pks are given, a
    general reindex will be launched.

    """
    model_class = get_model(app_label, model_name)
    using = connection_router.for_write()[0]
    index = connections[using].get_unified_index().get_index(model_class)

    pk_qs = index.index_queryset()
    if pks is not None:
        pk_qs = pk_qs.filter(pk__in=pks)

    if date_lookup is None:
        date_lookup = index.get_updated_field()
    if date_lookup is not None:
        if start is not None:
            pk_qs = pk_qs.filter(**{"%s__gte" % date_lookup: start})
        if end is not None:
            pk_qs = pk_qs.filter(**{"%s__lte" % date_lookup: end})

    pks = list(pk_qs.distinct().values_list('pk', flat=True))
    total = len(pks)

    for start in xrange(0, total, batch_size):
        end = min(start + batch_size, total)
        haystack_update.delay(app_label, model_name, pks[start:end],
                              remove=remove)
def worker(bits):
    # We need to reset the connections, otherwise the different processes
    # will try to share the connection, which causes things to blow up.
    from django.db import connections

    for alias, info in connections.databases.items():
        # We need to also tread lightly with SQLite, because blindly wiping
        # out connections (via ``... = {}``) destroys in-memory DBs.
        if not 'sqlite3' in info['ENGINE']:
            try:
                db.close_connection()
                del(connections._connections[alias])
            except KeyError:
                pass

    if bits[0] == 'do_update':
        func, model, start, end, total, using, start_date, end_date, verbosity = bits
    elif bits[0] == 'do_remove':
        func, model, pks_seen, start, upper_bound, using, verbosity = bits
    else:
        return

    backend_alias = using or connection_router.for_write(**{'models': [model]})
    unified_index = haystack_connections[backend_alias].get_unified_index()
    backend = haystack_connections[backend_alias].get_backend()
    index = unified_index.get_index(model)

    if func == 'do_update':
        qs = index.build_queryset(start_date=start_date, end_date=end_date)
        do_update(backend, index, qs, start, end, total, verbosity=verbosity)
    elif bits[0] == 'do_remove':
        do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=verbosity)
Exemple #3
0
def handle_index_update(action=None, callback_params={}):
    """This one does the actual work of re-indexing the instance.
    """
    if action is None or action == "":
        logger.warn("Indexing action cannot be None or empty!")
        return

    ct_id = callback_params["content_type_id"]
    model = ContentType.objects.get(id=ct_id).model_class()
    instance = model.objects.get(pk=callback_params["pk"])

    using_backends = connection_router.for_write(instance=instance)
    for using in using_backends:
        try:
            unified_index = connections[using].get_unified_index()
            index = unified_index.get_index(model)
            if action == ADD:
                if instance.state == "unpublished":
                    index.remove_object(instance, using=using)
                else:
                    index.update_object(instance, using=using)
            elif action == DELETE:
                index.remove_object(instance, using=using)

            # clean up the index item table now.
            try:
                item = IndexedItem.objects.get(content_type_pk=ct_id,
                                               instance_pk=instance.pk)
                item.delete()
            except ObjectDoesNotExist:
                logger.warn("IndexedItem not found... continuing.")
        except NotHandled:
            logger.warn("No indexing backend found for %s" % instance)
Exemple #4
0
def update_index(app_label, pk, is_save):
    """
    更新缓存

    算法:
        通过维护redis中的锁,确保事务的串行

    :param app_label: 模型名
    :param pk: id
    :param is_save: 是否是保存. true:保存(insert/update) false:删除delete
    :return:
    """

    app_name, model_name = app_label.split('.')
    app = apps.get_app_config(app_name)
    model = app.get_model(model_name)

    instance = model.objects.filter(pk=pk).first()
    if instance is None:
        return

    using_backends = connection_router.for_write(instance=instance)

    for using in using_backends:
        try:
            index = haystack_connections[using].get_unified_index().get_index(
                instance._meta.model)
            do_update_index(instance, index, using, is_save)
        except NotHandled:
            # TODO: Maybe log it or let the exception bubble?
            continue
Exemple #5
0
    def _get_backend(self, using):
        if using is None:
            hints = {
                'index': self,
                'models': [self.get_model()]
            }
            using = connection_router.for_write(**hints)

        return connections[using].get_backend()
Exemple #6
0
def indexes_for_object(instance):
    using_backends = connection_router.for_write(instance=instance)
    for using in using_backends:
        try:
            model = type(instance)
            index = connections[using].get_unified_index().get_index(model)
            yield index, using
        except NotHandled:
            pass
Exemple #7
0
    def _get_backend(self, using):
        if using is None:
            try:
                using = connection_router.for_write(index=self)[0]
            except IndexError:
                # There's no backend to handle it. Bomb out.
                return None

        return connections[using].get_backend()
Exemple #8
0
    def get_backend(self, using=None):
        if using is None:
            try:
                using = connection_router.for_write(index=self)[0]
            except IndexError:
                # There's no backend to handle it. Bomb out.
                return None

        return connections[using].get_backend()
Exemple #9
0
 def get_indexes(self, model_class, **kwargs):
     """
     Fetch the model's registered ``SearchIndex`` in a standarized way.
     """
     try:
         using_backends = connection_router.for_write(**{"models": [model_class]})
         for using in using_backends:
             index_holder = connections[using].get_unified_index()
             yield index_holder.get_index(model_class), using
     except IndexNotFoundException:
         raise ImproperlyConfigured("Couldn't find a SearchIndex for %s." % model_class)
Exemple #10
0
 def get_indexes(model_class):
     """
     Fetch the model's registered ``SearchIndex`` in a standarized way.
     """
     try:
         using_backends = connection_router.for_write(
             **{'models': [model_class]})
         for using in using_backends:
             index_holder = connections[using].get_unified_index()
             yield index_holder.get_index(model_class), using
     except IndexNotFoundException:
         raise ImproperlyConfigured("Couldn't find a SearchIndex for %s." %
                                    model_class)
def haystack_remove(app_label, model_name, pks):
    """
    Removes the haystack records for any instances with the given pks.

    """
    using = connection_router.for_write()[0]
    backend = connections[using].get_backend()

    def callback():
        for pk in pks:
            backend.remove(".".join((app_label, model_name, str(pk))))

    _haystack_database_retry(haystack_remove, callback)
Exemple #12
0
def update_lr_index_entry(res_obj):
    """
    Updates/creates the search index entry for the given language resource
    object.
    
    The appropriate search index is automatically chosen.
    """
    router_name = haystack_connection_router.for_write()
    if hasattr(router_name, '__iter__'):
        router_name = router_name[0]
    haystack_connections[router_name] \
        .get_unified_index().get_index(resourceInfoType_model) \
        .update_object(res_obj)
Exemple #13
0
 def get_index(self, model_class, **kwargs):
     """
     Fetch the model's registered ``SearchIndex`` in a standarized way.
     """
     try:
         if legacy:
             index_holder = site
         else:
             backend_alias = connection_router.for_write(**{'models': [model_class]})
             index_holder = connections[backend_alias].get_unified_index()  # noqa
         return index_holder.get_index(model_class)
     except IndexNotFoundException:
         raise ImproperlyConfigured("Couldn't find a SearchIndex for %s." %
                                    model_class)
     return None
Exemple #14
0
def process_action(action, instance, model):
    # Taken from celery_haystack.signals.CelerySignalProcessor.enqueue
    using_backends = connection_router.for_write(instance=instance)

    for using in using_backends:
        try:
            connection = connections[using]
            index = connection.get_unified_index().get_index(model)
        except NotHandled:
            continue

        if isinstance(index, CelerySearchIndex):
            if action == 'update' and not index.should_update(instance):
                continue
            identifier = get_identifier(instance)
            get_update_task()()(action, identifier)
            break
Exemple #15
0
 def get_index(self, model_class, **kwargs):
     """
     Fetch the model's registered ``SearchIndex`` in a standarized way.
     """
     try:
         if legacy:
             index_holder = site
         else:
             backend_alias = connection_router.for_write(
                 **{'models': [model_class]})
             index_holder = connections[backend_alias].get_unified_index(
             )  # noqa
         return index_holder.get_index(model_class)
     except IndexNotFoundException:
         raise ImproperlyConfigured("Couldn't find a SearchIndex for %s." %
                                    model_class)
     return None
Exemple #16
0
def repair_solr(short_id):
    """ Repair SOLR index content for a resource """

    logger = logging.getLogger(__name__)
    try:
        res = BaseResource.objects.get(short_id=short_id)
    except BaseResource.DoesNotExist:
        print("{} does not exist".format(short_id))

    # instance with proper type
    instance = res.get_content_model()
    assert instance, (res, res.content_model)

    print("re-indexing {} in solr".format(short_id))

    # instance of BaseResource matching real instance
    baseinstance = BaseResource.objects.get(pk=instance.pk)
    basesender = BaseResource
    using_backends = connection_router.for_write(instance=baseinstance)
    for using in using_backends:
        # if object is public/discoverable or becoming public/discoverable, index it
        if instance.raccess.public or instance.raccess.discoverable:
            try:
                index = connections[using].get_unified_index().get_index(
                    basesender)
                index.update_object(baseinstance, using=using)
            except NotHandled:
                logger.exception(
                    "Failure: changes to %s with short_id %s not added to Solr Index.",
                    str(type(instance)), baseinstance.short_id)

        # if object is private or becoming private, delete from index
        else:
            try:
                index = connections[using].get_unified_index().get_index(
                    basesender)
                index.remove_object(baseinstance, using=using)
            except NotHandled:
                logger.exception(
                    "Failure: delete of %s with short_id %s failed.",
                    str(type(instance)), baseinstance.short_id)
Exemple #17
0
def update_instance_indexes(sender_type_id, object_type_id, object_id):
    """
    Given an individual model instance, update its entire indexes.
    """
    sender = ContentType.objects.get_for_id(sender_type_id)
    object_type = ContentType.objects.get_for_id(object_type_id)
    instance = object_type.get_object_for_this_type(pk=object_id)

    try:
        using_backends = connection_router.for_write(instance=instance)
    except IndexError:
        # No valid instance given, stop processing here
        return None

    for using in using_backends:
        try:
            index = connections[using].get_unified_index().get_index(sender)
            index.update(using=using)
        except NotHandled:
            # TODO: Maybe log it or let the exception bubble?
            pass
Exemple #18
0
def repair_solr(short_id):
    """ Repair SOLR index content for a resource """

    logger = logging.getLogger(__name__)
    try:
        res = BaseResource.objects.get(short_id=short_id)
    except BaseResource.DoesNotExist:
        print("{} does not exist".format(short_id))

    # instance with proper type
    instance = res.get_content_model()
    assert instance, (res, res.content_model)

    print("re-indexing {} in solr".format(short_id))

    # instance of BaseResource matching real instance
    baseinstance = BaseResource.objects.get(pk=instance.pk)
    basesender = BaseResource
    using_backends = connection_router.for_write(instance=baseinstance)
    for using in using_backends:
        # if object is public/discoverable or becoming public/discoverable, index it
        if instance.raccess.public or instance.raccess.discoverable:
            try:
                index = connections[using].get_unified_index().get_index(basesender)
                index.update_object(baseinstance, using=using)
            except NotHandled:
                logger.exception(
                    "Failure: changes to %s with short_id %s not added to Solr Index.",
                    str(type(instance)), baseinstance.short_id)

        # if object is private or becoming private, delete from index
        else:
            try:
                index = connections[using].get_unified_index().get_index(basesender)
                index.remove_object(baseinstance, using=using)
            except NotHandled:
                logger.exception("Failure: delete of %s with short_id %s failed.",
                                 str(type(instance)), baseinstance.short_id)
def haystack_update(app_label, model_name, pks, remove=True):
    """
    Updates the haystack records for any valid instances with the given pks.
    Generally, ``remove`` should be ``True`` so that items which are no longer
    in the ``index_queryset()`` will be taken out of the index; however,
    ``remove`` can be set to ``False`` to save some time if that behavior
    isn't needed.

    """
    model_class = get_model(app_label, model_name)
    using = connection_router.for_write()[0]
    backend = connections[using].get_backend()
    index = connections[using].get_unified_index().get_index(model_class)

    qs = index.index_queryset().filter(pk__in=pks)

    if qs:
        _haystack_database_retry(haystack_update,
                                 lambda: backend.update(index, qs))

    if remove:
        unseen_pks = set(pks) - set((instance.pk for instance in qs))
        haystack_remove.apply(args=(app_label, model_name, unseen_pks))
Exemple #20
0
def remove_objects_indexes(sender_type_id, object_type_id, object_id):
    """
    Given a set of `objects` model instances, remove them from the index as preparation
    for the new index.
    """
    sender = ContentType.objects.get_for_id(sender_type_id)
    object_type = ContentType.objects.get_for_id(object_type_id)
    instance = object_type.get_object_for_this_type(pk=object_id)

    if isinstance(instance, Submission):
        # Submission have complex status handling, so a status change should lead to
        # more drastic reindexing.
        ids_list = [
            k['id'] for k in list(instance.thread.public().values('id'))
        ]
        objects = Submission.objects.filter(pk__in=ids_list)
    else:
        # Objects such as Reports, Comments, Commentaries, etc. may get rejected. This
        # does not remove them from the index. Therefore, do a complete rebuild_index
        # action on that specific instance every time the index signal is triggered.
        objects = [instance]

    try:
        using_backends = connection_router.for_write(instance=objects[0])
    except IndexError:
        # No submissions given, stop processing here
        return None

    for instance in objects:
        for using in using_backends:
            try:
                index = connections[using].get_unified_index().get_index(
                    sender)
                index.remove_object(instance, using=using)
            except NotHandled:
                # TODO: Maybe log it or let the exception bubble?
                pass
Exemple #21
0
 def _get_backend(self, using):
     if using is None:
         using = connection_router.for_write(index=self)
     
     return connections[using].get_backend()
    def _get_backend(self, using):
        if using is None:
            using = connection_router.for_write(index=self)

        return connections[using].get_backend()
Exemple #23
0
def get_indexes(model_class):
    using_backends = connection_router.for_write(models=[model_class])
    for using in using_backends:
        index_holder = connections[using].get_unified_index()
        yield index_holder.get_index(model_class)
Exemple #24
0
def get_indexes(model_class):
    using_backends = connection_router.for_write(models=[model_class])
    for using in using_backends:
        index_holder = connections[using].get_unified_index()
        yield index_holder.get_index(model_class)