Example #1
0
def reindex_with_progress(write_index):
    """Rebuild elasticsearch index while updating progress bar for admins.
    """
    # Need to import Record here to prevent circular import
    from search.models import Record

    rec = Record(
        starttime=datetime.datetime.now(),
        text=u'Reindexing into %s' % write_index)
    rec.save()
    try:
        # Init progress bar stuff:
        cache.set(ES_REINDEX_PROGRESS, 0.001)  # An iota so it tests
                                               # true in the template

        # Reindex:
        start = time()
        for ratio in es_reindex_with_progress():
            now = time()
            if now > start + settings.ES_REINDEX_PROGRESS_BAR_INTERVAL:
                # Update memcached only every so often.
                start = now
                # Format the string to avoid exponential notation,
                # which seems to be understood by JS but makes me
                # nervous:
                cache.set(ES_REINDEX_PROGRESS, '%.5f' % ratio)

        rec.endtime = datetime.datetime.now()
        rec.save()
    except Exception:

        rec.text = (u'%s: Errored out %s %s' % (
                rec.text, sys.exc_type, sys.exc_value))
        rec.endtime = datetime.datetime.now()
        rec.save()
        raise
    finally:
        cache.delete(ES_REINDEX_PROGRESS)
Example #2
0
def index_chunk_task(write_index, batch_id, chunk):
    """Index a chunk of things.

    :arg write_index: the name of the index to index to
    :arg batch_id: the name for the batch this chunk belongs to
    :arg chunk: a (class, id_list) of things to index
    """
    # Need to import Record here to prevent circular import
    from search.models import Record

    cls, id_list = chunk

    task_name = '{0} {1} -> {2}'.format(
        cls.get_mapping_type_name(), id_list[0], id_list[-1])

    rec = Record(
        starttime=datetime.datetime.now(),
        text=(u'Batch: %s Task: %s: Reindexing into %s' % (
                batch_id, task_name, write_index)))
    rec.save()

    try:
        # Pin to master db to avoid replication lag issues and stale
        # data.
        pin_this_thread()

        index_chunk(cls, id_list, reraise=True)

    except Exception:
        rec.text = (u'%s: Errored out %s %s' % (
                rec.text, sys.exc_type, sys.exc_value))
        # Some exceptions aren't pickleable and we need this to throw
        # things that are pickleable.
        raise IndexingTaskError()

    finally:
        unpin_this_thread()
        rec.endtime = datetime.datetime.now()
        rec.save()

        try:
            client = redis_client('default')
            client.decr(OUTSTANDING_INDEX_CHUNKS, 1)
        except RedisError:
            # If Redis isn't running, then we just log that the task
            # was completed.
            log.info('Index task %s completed.', task_name)
Example #3
0
def index_chunk_task(write_index, batch_id, chunk):
    """Index a chunk of things.

    :arg write_index: the name of the index to index to
    :arg batch_id: the name for the batch this chunk belongs to
    :arg chunk: a (class, id_list) of things to index
    """
    # Need to import Record here to prevent circular import
    from search.models import Record

    cls, id_list = chunk

    task_name = '{0} {1} -> {2}'.format(
        cls.get_model_name(), id_list[0], id_list[-1])

    rec = Record(
        starttime=datetime.datetime.now(),
        text=(u'Batch: %s Task: %s: Reindexing into %s' % (
                batch_id, task_name, write_index)))
    rec.save()

    try:
        # Pin to master db to avoid replication lag issues and stale
        # data.
        pin_this_thread()

        index_chunk(cls, id_list, reraise=True)

    except Exception:
        rec.text = (u'%s: Errored out %s %s' % (
                rec.text, sys.exc_type, sys.exc_value))
        # Some exceptions aren't pickleable and we need this to throw
        # things that are pickleable.
        raise IndexingTaskError()

    finally:
        unpin_this_thread()
        rec.endtime = datetime.datetime.now()
        rec.save()

        try:
            client = redis_client('default')
            client.decr(OUTSTANDING_INDEX_CHUNKS, 1)
        except RedisError:
            # If Redis isn't running, then we just log that the task
            # was completed.
            log.info('Index task %s completed.', task_name)
Example #4
0
def index_chunk_task(write_index, batch_id, chunk):
    """Index a chunk of things.

    :arg write_index: the name of the index to index to
    :arg batch_id: the name for the batch this chunk belongs to
    :arg chunk: a (class, id_list) of things to index
    """
    # Need to import Record here to prevent circular import
    from search.models import Record

    cls, id_list = chunk

    task_name = '%s %d -> %d' % (cls.get_model_name(), id_list[0], id_list[-1])

    rec = Record(
        starttime=datetime.datetime.now(),
        text=(u'Batch: %s Task: %s: Reindexing into %s' % (
                batch_id, task_name, write_index)))
    rec.save()

    try:
        index_chunk(cls, id_list, reraise=True)

    except Exception:
        rec.text = (u'%s: Errored out %s %s' % (
                rec.text, sys.exc_type, sys.exc_value))
        raise
    finally:
        rec.endtime = datetime.datetime.now()
        rec.save()

        try:
            client = redis_client('default')
            client.decr(OUTSTANDING_INDEX_CHUNKS, 1)
        except RedisError:
            # If Redis isn't running, then we just log that the task
            # was completed.
            log.info('Index task %s completed.', task_name)
Example #5
0
def reindex_with_progress(waffle_when_done=False):
    """Rebuild elasticsearch index while updating progress bar for admins."""
    # Need to import Record here to prevent circular import
    from search.models import Record
    try:
        rec = Record(
            starttime=datetime.datetime.now(),
            text=u'Reindexing into %s' % settings.ES_WRITE_INDEXES['default'])
        rec.save()

        # Init progress bar stuff:
        cache.set(ES_REINDEX_PROGRESS, 0.001)  # An iota so it tests true in
                                               # the template
        if waffle_when_done:
            cache.set(ES_WAFFLE_WHEN_DONE, True)
        else:
            # Clear it in case there was already one there somehow:
            cache.delete(ES_WAFFLE_WHEN_DONE)

        # Reindex:
        start = time()
        for ratio in es_reindex_with_progress():
            now = time()
            if now > start + settings.ES_REINDEX_PROGRESS_BAR_INTERVAL:
                # Update memcached only every so often.
                start = now
                # Format the string to avoid exponential notation, which seems
                # to be understood by JS but makes me nervous:
                cache.set(ES_REINDEX_PROGRESS, '%.5f' % ratio)

        if cache.get(ES_WAFFLE_WHEN_DONE):
            # Just go ahead and crash if the flag isn't there.
            flag = Flag.objects.get(name='elasticsearch')
            flag.everyone = True
            flag.save()

        rec.endtime = datetime.datetime.now()
        rec.save()
    finally:
        cache.delete(ES_REINDEX_PROGRESS)
        cache.delete(ES_WAFFLE_WHEN_DONE)