Beispiel #1
0
def reconcile_task(write_index, batch_id, mapping_type_name):
    """Reconciles the data in the index with what's in the db

    This pulls the list of ids from the db and the list of ids from
    the index. Then it unindexes everything that shouldn't be in the
    index.

    :arg mapping_type_name: name of mapping type to reconcile

    """
    # Need to import Record here to prevent circular import
    from kitsune.search.models import Record

    rec = Record.objects.create(
        starttime=datetime.datetime.now(),
        text=(u'Batch: {0} Task: {1}: Reconciling {2}'.format(
            batch_id, mapping_type_name, write_index)))

    # get_indexable returns a list of tuples, but since we're only
    # passing one mapping type name, we only get one result back.
    cls, db_id_list = get_indexable(mapping_types=[mapping_type_name])[0]

    try:
        total = reconcile_chunk(cls, db_id_list, reraise=True)
        rec.text = u'{0}: Total reconciled: {1}'.format(
            rec.text, total)

    except Exception:
        rec.text = u'{0}: Errored out {1} {2}'.format(
            rec.text, sys.exc_type, sys.exc_value)
        raise IndexingTaskError()

    finally:
        rec.endtime = datetime.datetime.now()
        rec.save()
Beispiel #2
0
def reconcile_task(write_index, batch_id, mapping_type_name):
    """Reconciles the data in the index with what's in the db

    This pulls the list of ids from the db and the list of ids from
    the index. Then it unindexes everything that shouldn't be in the
    index.

    :arg mapping_type_name: name of mapping type to reconcile

    """
    # Need to import Record here to prevent circular import
    from kitsune.search.models import Record

    rec = Record.objects.create(
        starttime=datetime.datetime.now(),
        text=(u'Batch: {0} Task: {1}: Reconciling {2}'.format(
            batch_id, mapping_type_name, write_index)))

    # get_indexable returns a list of tuples, but since we're only
    # passing one mapping type name, we only get one result back.
    cls, db_id_list = get_indexable(mapping_types=[mapping_type_name])[0]

    try:
        total = reconcile_chunk(cls, db_id_list, reraise=True)
        rec.text = u'{0}: Total reconciled: {1}'.format(rec.text, total)

    except Exception:
        rec.text = u'{0}: Errored out {1} {2}'.format(rec.text, sys.exc_type,
                                                      sys.exc_value)
        raise IndexingTaskError()

    finally:
        rec.endtime = datetime.datetime.now()
        rec.save()
Beispiel #3
0
def reindex(mapping_type_names):
    """Reindex all instances of a given mapping type with celery tasks

    :arg mapping_type_names: list of mapping types to reindex

    """
    outstanding = Record.objects.outstanding().count()
    if outstanding > 0:
        raise ReindexError('There are %s outstanding chunks.' % outstanding)

    batch_id = create_batch_id()

    # Break up all the things we want to index into chunks. This
    # chunkifies by class then by chunk size.
    chunks = []
    for cls, indexable in get_indexable(mapping_types=mapping_type_names):
        chunks.extend((cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE))

    for cls, id_list in chunks:
        index = cls.get_index()
        chunk_name = 'Indexing: %s %d -> %d' % (cls.get_mapping_type_name(),
                                                id_list[0], id_list[-1])
        rec = Record.objects.create(batch_id=batch_id, name=chunk_name)
        index_chunk_task.delay(index, batch_id, rec.id,
                               (to_class_path(cls), id_list))
Beispiel #4
0
def reindex(mapping_type_names):
    """Reindex all instances of a given mapping type with celery tasks

    :arg mapping_type_names: list of mapping types to reindex

    """
    outstanding = Record.objects.outstanding().count()
    if outstanding > 0:
        raise ReindexError('There are %s outstanding chunks.' % outstanding)

    batch_id = create_batch_id()

    # Break up all the things we want to index into chunks. This
    # chunkifies by class then by chunk size.
    chunks = []
    for cls, indexable in get_indexable(mapping_types=mapping_type_names):
        chunks.extend(
            (cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE))

    for cls, id_list in chunks:
        index = cls.get_index()
        chunk_name = 'Indexing: %s %d -> %d' % (
            cls.get_mapping_type_name(), id_list[0], id_list[-1])
        rec = Record.objects.create(batch_id=batch_id, name=chunk_name)
        index_chunk_task.delay(index, batch_id, rec.id, (to_class_path(cls), id_list))
Beispiel #5
0
def reindex_with_scoreboard(mapping_type_names):
    """Reindex all instances of a given mapping type with celery tasks.

    This will use Redis to keep track of outstanding tasks so nothing
    gets screwed up by two jobs running at once.
    """
    # TODO: If this gets fux0rd, then it's possible this could be
    # non-zero and we really want to just ignore it. Need the ability
    # to ignore it.
    try:
        client = redis_client('default')
        val = client.get(OUTSTANDING_INDEX_CHUNKS)
        if val is not None and int(val) > 0:
            raise ReindexError('There are %s outstanding chunks.' % val)

        # We don't know how many chunks we're building, but we do want
        # to make sure another reindex request doesn't slide in here
        # and kick off a bunch of chunks.
        #
        # There is a race condition here.
        client.set(OUTSTANDING_INDEX_CHUNKS, 1)
    except RedisError:
        log.warning('Redis not running. Can not check if there are '
                    'outstanding tasks.')

    batch_id = create_batch_id()

    # Break up all the things we want to index into chunks. This
    # chunkifies by class then by chunk size. Also generate
    # reconcile_tasks.
    chunks = []
    for cls, indexable in get_indexable(mapping_types=mapping_type_names):
        chunks.extend(
            (cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE))

        reconcile_task.delay(cls.get_index(), batch_id,
                             cls.get_mapping_type_name())

    chunks_count = len(chunks)

    try:
        client = redis_client('default')
        client.set(OUTSTANDING_INDEX_CHUNKS, chunks_count)
    except RedisError:
        log.warning('Redis not running. Can\'t denote outstanding tasks.')

    for chunk in chunks:
        index = chunk[0].get_index()
        index_chunk_task.delay(index, batch_id, chunk)
Beispiel #6
0
def reindex_with_scoreboard(mapping_type_names):
    """Reindex all instances of a given mapping type with celery tasks.

    This will use Redis to keep track of outstanding tasks so nothing
    gets screwed up by two jobs running at once.
    """
    # TODO: If this gets fux0rd, then it's possible this could be
    # non-zero and we really want to just ignore it. Need the ability
    # to ignore it.
    try:
        client = redis_client('default')
        val = client.get(OUTSTANDING_INDEX_CHUNKS)
        if val is not None and int(val) > 0:
            raise ReindexError('There are %s outstanding chunks.' % val)

        # We don't know how many chunks we're building, but we do want
        # to make sure another reindex request doesn't slide in here
        # and kick off a bunch of chunks.
        #
        # There is a race condition here.
        client.set(OUTSTANDING_INDEX_CHUNKS, 1)
    except RedisError:
        log.warning('Redis not running. Can not check if there are '
                    'outstanding tasks.')

    batch_id = create_batch_id()

    # Break up all the things we want to index into chunks. This
    # chunkifies by class then by chunk size. Also generate
    # reconcile_tasks.
    chunks = []
    for cls, indexable in get_indexable(mapping_types=mapping_type_names):
        chunks.extend((cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE))

        reconcile_task.delay(cls.get_index(), batch_id,
                             cls.get_mapping_type_name())

    chunks_count = len(chunks)

    try:
        client = redis_client('default')
        client.set(OUTSTANDING_INDEX_CHUNKS, chunks_count)
    except RedisError:
        log.warning('Redis not running. Can\'t denote outstanding tasks.')

    for chunk in chunks:
        index = chunk[0].get_index()
        index_chunk_task.delay(index, batch_id, chunk)
Beispiel #7
0
def handle_reindex(request):
    """Caculates and kicks off indexing tasks"""
    # This is truthy if the user wants us to delete and recreate
    # the index first.
    delete_index_first = bool(request.POST.get('delete_index'))

    if delete_index_first:
        # Coming from the delete form, so we reindex all models.
        mapping_types_to_index = None
    else:
        # Coming from the reindex form, so we reindex whatever we're
        # told.
        mapping_types_to_index = [name.replace('check_', '')
                                  for name in request.POST.keys()
                                  if name.startswith('check_')]

    # TODO: If this gets fux0rd, then it's possible this could be
    # non-zero and we really want to just ignore it. Need the ability
    # to ignore it.
    try:
        client = redis_client('default')
        val = client.get(OUTSTANDING_INDEX_CHUNKS)
        if val is not None and int(val) > 0:
            raise ReindexError('There are %s outstanding chunks.' % val)

        # We don't know how many chunks we're building, but we do want
        # to make sure another reindex request doesn't slide in here
        # and kick off a bunch of chunks.
        #
        # There is a race condition here.
        client.set(OUTSTANDING_INDEX_CHUNKS, 1)
    except RedisError:
        log.warning('Redis not running. Can not check if there are '
                    'outstanding tasks.')

    batch_id = create_batch_id()

    # Break up all the things we want to index into chunks. This
    # chunkifies by class then by chunk size.
    chunks = []
    for cls, indexable in get_indexable(mapping_types=mapping_types_to_index):
        chunks.extend(
            (cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE))

    if delete_index_first:
        # The previous lines do a lot of work and take some time to
        # execute.  So we wait until here to wipe and rebuild the
        # index. That reduces the time that there is no index by a little.
        recreate_index()

    chunks_count = len(chunks)

    try:
        client = redis_client('default')
        client.set(OUTSTANDING_INDEX_CHUNKS, chunks_count)
    except RedisError:
        log.warning('Redis not running. Can\'t denote outstanding tasks.')

    for chunk in chunks:
        index_chunk_task.delay(write_index(), batch_id, chunk)

    return HttpResponseRedirect(request.path)
Beispiel #8
0
def handle_reindex(request):
    """Caculates and kicks off indexing tasks"""
    # This is truthy if the user wants us to delete and recreate
    # the index first.
    delete_index_first = bool(request.POST.get('delete_index'))

    if delete_index_first:
        # Coming from the delete form, so we reindex all models.
        mapping_types_to_index = None
    else:
        # Coming from the reindex form, so we reindex whatever we're
        # told.
        mapping_types_to_index = [
            name.replace('check_', '') for name in request.POST.keys()
            if name.startswith('check_')
        ]

    # TODO: If this gets fux0rd, then it's possible this could be
    # non-zero and we really want to just ignore it. Need the ability
    # to ignore it.
    try:
        client = redis_client('default')
        val = client.get(OUTSTANDING_INDEX_CHUNKS)
        if val is not None and int(val) > 0:
            raise ReindexError('There are %s outstanding chunks.' % val)

        # We don't know how many chunks we're building, but we do want
        # to make sure another reindex request doesn't slide in here
        # and kick off a bunch of chunks.
        #
        # There is a race condition here.
        client.set(OUTSTANDING_INDEX_CHUNKS, 1)
    except RedisError:
        log.warning('Redis not running. Can not check if there are '
                    'outstanding tasks.')

    batch_id = create_batch_id()

    # Break up all the things we want to index into chunks. This
    # chunkifies by class then by chunk size.
    chunks = []
    for cls, indexable in get_indexable(mapping_types=mapping_types_to_index):
        chunks.extend((cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE))

    if delete_index_first:
        # The previous lines do a lot of work and take some time to
        # execute.  So we wait until here to wipe and rebuild the
        # index. That reduces the time that there is no index by a little.
        recreate_index()

    chunks_count = len(chunks)

    try:
        client = redis_client('default')
        client.set(OUTSTANDING_INDEX_CHUNKS, chunks_count)
    except RedisError:
        log.warning('Redis not running. Can\'t denote outstanding tasks.')

    for chunk in chunks:
        index_chunk_task.delay(write_index(), batch_id, chunk)

    return HttpResponseRedirect(request.path)