Esempio n. 1
0
def count_out_of_date():
    """
    Count number of synonyms that differ between the database and ES.

    :returns: A 2-tuple where the first element is the number of synonyms
        that are in the DB but not in ES, and the second element is the
        number of synonyms in ES that are not in the DB.
    """
    es = es_utils.get_es()

    index_name = es_utils.write_index('default')
    settings = (es.indices.get_settings(index_name)
                .get(index_name, {})
                .get('settings', {}))

    synonym_key_re = re.compile(
        r'index\.analysis\.filter\.synonyms-.*\.synonyms\.\d+')

    synonyms_in_es = set()
    for key, val in settings.items():
        if synonym_key_re.match(key):
            synonyms_in_es.add(val)

    synonyms_in_db = set(unicode(s) for s in Synonym.uncached.all())

    synonyms_to_add = synonyms_in_db - synonyms_in_es
    synonyms_to_remove = synonyms_in_es - synonyms_in_db

    if synonyms_to_remove == set(['firefox => firefox']):
        synonyms_to_remove = set()

    return (len(synonyms_to_add), len(synonyms_to_remove))
Esempio n. 2
0
def count_out_of_date():
    """
    Count number of synonyms that differ between the database and ES.

    :returns: A 2-tuple where the first element is the number of synonyms
        that are in the DB but not in ES, and the second element is the
        number of synonyms in ES that are not in the DB.
    """
    es = es_utils.get_es()

    index_name = es_utils.write_index("default")
    settings = es.indices.get_settings(index_name).get(index_name,
                                                       {}).get("settings", {})

    synonym_key_re = re.compile(
        r"index\.analysis\.filter\.synonyms-.*\.synonyms\.\d+")

    synonyms_in_es = set()
    for key, val in list(settings.items()):
        if synonym_key_re.match(key):
            synonyms_in_es.add(val)

    synonyms_in_db = set(str(s) for s in Synonym.objects.all())

    synonyms_to_add = synonyms_in_db - synonyms_in_es
    synonyms_to_remove = synonyms_in_es - synonyms_in_db

    if synonyms_to_remove == {"firefox => firefox"}:
        synonyms_to_remove = set()

    return (len(synonyms_to_add), len(synonyms_to_remove))
Esempio n. 3
0
def handle_recreate_index(request):
    """Deletes an index, recreates it, and reindexes it."""
    groups = [name.replace("check_", "") for name in request.POST.keys() if name.startswith("check_")]

    indexes = [write_index(group) for group in groups]
    recreate_indexes(indexes=indexes)

    mapping_types_names = [mt.get_mapping_type_name() for mt in get_mapping_types() if mt.get_index_group() in groups]
    reindex_with_scoreboard(mapping_types_names)

    return HttpResponseRedirect(request.path)
Esempio n. 4
0
    def refresh(self, run_tasks=True):
        index = es_utils.write_index()

        if run_tasks:
            # Any time we're doing a refresh, we're making sure that
            # the index is ready to be queried. Given that, it's
            # almost always the case that we want to run all the
            # generated tasks, then refresh.
            generate_tasks()

        get_es().refresh(index)
        get_es().health(wait_for_status='yellow')
Esempio n. 5
0
    def refresh(self, run_tasks=True):
        index = es_utils.write_index()

        if run_tasks:
            # Any time we're doing a refresh, we're making sure that
            # the index is ready to be queried. Given that, it's
            # almost always the case that we want to run all the
            # generated tasks, then refresh.
            generate_tasks()

        get_es().refresh(index)
        get_es().health(wait_for_status='yellow')
Esempio n. 6
0
def handle_recreate_index(request):
    """Deletes an index, recreates it, and reindexes it."""
    groups = [name.replace('check_', '')
              for name in request.POST.keys()
              if name.startswith('check_')]

    indexes = [write_index(group) for group in groups]
    recreate_indexes(indexes=indexes)

    mapping_types_names = [mt.get_mapping_type_name()
                           for mt in get_mapping_types()
                           if mt.get_index_group() in groups]
    reindex(mapping_types_names)

    return HttpResponseRedirect(request.path)
Esempio n. 7
0
def update_synonyms_task():
    es = get_es()

    # Close the index, update the settings, then re-open it.
    # This will cause search to be unavailable for a few seconds.
    # This updates all of the analyzer settings, which is kind of overkill,
    # but will make sure everything stays consistent.
    index = write_index("default")
    analysis = get_analysis()

    # if anything goes wrong, it is very important to re-open the index.
    try:
        es.indices.close(index)
        es.indices.put_settings(index=index, body={"analysis": analysis,})
    finally:
        es.indices.open(index)
Esempio n. 8
0
def update_synonyms_task():
    es = get_es()

    # Close the index, update the settings, then re-open it.
    # This will cause search to be unavailable for a few seconds.
    # This updates all of the analyzer settings, which is kind of overkill,
    # but will make sure everything stays consistent.
    index = write_index('default')
    analysis = get_analysis()

    # if anything goes wrong, it is very important to re-open the index.
    try:
        es.indices.close(index)
        es.indices.put_settings(index=index, body={
            'analysis': analysis,
        })
    finally:
        es.indices.open(index)
Esempio n. 9
0
def handle_reindex(request):
    """Caculates and kicks off indexing tasks"""
    # This is truthy if the user wants us to delete and recreate
    # the index first.
    delete_index_first = bool(request.POST.get('delete_index'))

    if delete_index_first:
        # Coming from the delete form, so we reindex all models.
        mapping_types_to_index = None
    else:
        # Coming from the reindex form, so we reindex whatever we're
        # told.
        mapping_types_to_index = [name.replace('check_', '')
                                  for name in request.POST.keys()
                                  if name.startswith('check_')]

    # TODO: If this gets fux0rd, then it's possible this could be
    # non-zero and we really want to just ignore it. Need the ability
    # to ignore it.
    try:
        client = redis_client('default')
        val = client.get(OUTSTANDING_INDEX_CHUNKS)
        if val is not None and int(val) > 0:
            raise ReindexError('There are %s outstanding chunks.' % val)

        # We don't know how many chunks we're building, but we do want
        # to make sure another reindex request doesn't slide in here
        # and kick off a bunch of chunks.
        #
        # There is a race condition here.
        client.set(OUTSTANDING_INDEX_CHUNKS, 1)
    except RedisError:
        log.warning('Redis not running. Can not check if there are '
                    'outstanding tasks.')

    batch_id = create_batch_id()

    # Break up all the things we want to index into chunks. This
    # chunkifies by class then by chunk size.
    chunks = []
    for cls, indexable in get_indexable(mapping_types=mapping_types_to_index):
        chunks.extend(
            (cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE))

    if delete_index_first:
        # The previous lines do a lot of work and take some time to
        # execute.  So we wait until here to wipe and rebuild the
        # index. That reduces the time that there is no index by a little.
        recreate_index()

    chunks_count = len(chunks)

    try:
        client = redis_client('default')
        client.set(OUTSTANDING_INDEX_CHUNKS, chunks_count)
    except RedisError:
        log.warning('Redis not running. Can\'t denote outstanding tasks.')

    for chunk in chunks:
        index_chunk_task.delay(write_index(), batch_id, chunk)

    return HttpResponseRedirect(request.path)
Esempio n. 10
0
    try:
        # TODO: SUMO has a single ES_URL and that's the ZLB and does
        # the balancing. If that ever changes and we have multiple
        # ES_URLs, then this should get fixed.
        es_deets = requests.get(settings.ES_URLS[0]).json()
    except requests.exceptions.RequestException:
        pass

    try:
        stats = get_doctype_stats(read_index())
    except ES_EXCEPTIONS:
        pass

    try:
        write_stats = get_doctype_stats(write_index())
    except ES_EXCEPTIONS:
        pass

    try:
        indexes = get_indexes()
        indexes.sort(key=lambda m: m[0])
    except ES_EXCEPTIONS as e:
        error_messages.append('Error: {0}'.format(repr(e)))

    try:
        client = redis_client('default')
        outstanding_chunks = int(client.get(OUTSTANDING_INDEX_CHUNKS))
    except (RedisError, TypeError):
        pass
Esempio n. 11
0
 def get_index(cls):
     return es_utils.write_index(cls.get_index_group())
Esempio n. 12
0
 def teardown_indexes(self):
     """Tear down write index"""
     es_utils.delete_index(es_utils.write_index())
Esempio n. 13
0
def search(request):
    """Render the admin view containing search tools"""
    if not request.user.has_perm('search.reindex'):
        raise PermissionDenied

    error_messages = []
    stats = {}

    if 'reset' in request.POST:
        try:
            return handle_reset(request)
        except ReindexError as e:
            error_messages.append(u'Error: %s' % e.message)

    if 'reindex' in request.POST:
        try:
            return handle_reindex(request)
        except ReindexError as e:
            error_messages.append(u'Error: %s' % e.message)

    if 'recreate_index' in request.POST:
        try:
            return handle_recreate_index(request)
        except ReindexError as e:
            error_messages.append(u'Error: %s' % e.message)

    if 'delete_index' in request.POST:
        try:
            return handle_delete(request)
        except DeleteError as e:
            error_messages.append(u'Error: %s' % e.message)
        except ES_EXCEPTIONS as e:
            error_messages.append('Error: {0}'.format(repr(e)))

    stats = None
    write_stats = None
    es_deets = None
    indexes = []
    outstanding_chunks = None

    try:
        # TODO: SUMO has a single ES_URL and that's the ZLB and does
        # the balancing. If that ever changes and we have multiple
        # ES_URLs, then this should get fixed.
        es_deets = requests.get(settings.ES_URLS[0]).json()
    except requests.exceptions.RequestException:
        pass

    stats = {}
    for index in all_read_indexes():
        try:
            stats[index] = get_doctype_stats(index)
        except ES_EXCEPTIONS:
            stats[index] = None

    write_stats = {}
    for index in all_write_indexes():
        try:
            write_stats[index] = get_doctype_stats(index)
        except ES_EXCEPTIONS:
            write_stats[index] = None

    try:
        indexes = get_indexes()
        indexes.sort(key=lambda m: m[0])
    except ES_EXCEPTIONS as e:
        error_messages.append('Error: {0}'.format(repr(e)))

    try:
        client = redis_client('default')
        outstanding_chunks = int(client.get(OUTSTANDING_INDEX_CHUNKS))
    except (RedisError, TypeError):
        pass

    recent_records = Record.uncached.order_by('-starttime')[:100]

    outstanding_records = (Record.uncached.filter(endtime__isnull=True)
                                          .order_by('-starttime'))

    index_groups = set(settings.ES_INDEXES.keys())
    index_groups |= set(settings.ES_WRITE_INDEXES.keys())

    index_group_data = [[group, read_index(group), write_index(group)]
                        for group in index_groups]

    return render(
        request,
        'admin/search_maintenance.html',
        {'title': 'Search',
         'es_deets': es_deets,
         'doctype_stats': stats,
         'doctype_write_stats': write_stats,
         'indexes': indexes,
         'index_groups': index_groups,
         'index_group_data': index_group_data,
         'read_indexes': all_read_indexes,
         'write_indexes': all_write_indexes,
         'error_messages': error_messages,
         'recent_records': recent_records,
         'outstanding_records': outstanding_records,
         'outstanding_chunks': outstanding_chunks,
         'now': datetime.now(),
         'read_index': read_index,
         'write_index': write_index,
         })
Esempio n. 14
0
def search(request):
    """Render the admin view containing search tools"""
    if not request.user.has_perm('search.reindex'):
        raise PermissionDenied

    error_messages = []
    stats = {}

    if 'reset' in request.POST:
        try:
            return handle_reset(request)
        except ReindexError as e:
            error_messages.append('Error: %s' % e.message)

    if 'reindex' in request.POST:
        try:
            return handle_reindex(request)
        except ReindexError as e:
            error_messages.append('Error: %s' % e.message)

    if 'recreate_index' in request.POST:
        try:
            return handle_recreate_index(request)
        except ReindexError as e:
            error_messages.append('Error: %s' % e.message)

    if 'delete_index' in request.POST:
        try:
            return handle_delete(request)
        except DeleteError as e:
            error_messages.append('Error: %s' % e.message)
        except ES_EXCEPTIONS as e:
            error_messages.append('Error: {0}'.format(repr(e)))

    stats = None
    write_stats = None
    es_deets = None
    indexes = []

    try:
        # TODO: SUMO has a single ES_URL and that's the ZLB and does
        # the balancing. If that ever changes and we have multiple
        # ES_URLs, then this should get fixed.
        es_deets = requests.get(settings.ES_URLS[0]).json()
    except requests.exceptions.RequestException:
        pass

    stats = {}
    for index in all_read_indexes():
        try:
            stats[index] = get_doctype_stats(index)
        except ES_EXCEPTIONS:
            stats[index] = None

    write_stats = {}
    for index in all_write_indexes():
        try:
            write_stats[index] = get_doctype_stats(index)
        except ES_EXCEPTIONS:
            write_stats[index] = None

    try:
        indexes = get_indexes()
        indexes.sort(key=lambda m: m[0])
    except ES_EXCEPTIONS as e:
        error_messages.append('Error: {0}'.format(repr(e)))

    recent_records = Record.objects.all()[:100]
    outstanding_records = Record.objects.outstanding()

    index_groups = set(settings.ES_INDEXES.keys())
    index_groups |= set(settings.ES_WRITE_INDEXES.keys())

    index_group_data = [[group, read_index(group),
                         write_index(group)] for group in index_groups]

    return render(
        request, 'admin/search_maintenance.html', {
            'title': 'Search',
            'es_deets': es_deets,
            'doctype_stats': stats,
            'doctype_write_stats': write_stats,
            'indexes': indexes,
            'index_groups': index_groups,
            'index_group_data': index_group_data,
            'read_indexes': all_read_indexes,
            'write_indexes': all_write_indexes,
            'error_messages': error_messages,
            'recent_records': recent_records,
            'outstanding_records': outstanding_records,
            'now': datetime.now(),
            'read_index': read_index,
            'write_index': write_index,
        })
Esempio n. 15
0
 def get_index(cls):
     return es_utils.write_index(cls.get_index_group())
Esempio n. 16
0
def search(request):
    """Render the admin view containing search tools"""
    if not request.user.has_perm("search.reindex"):
        raise PermissionDenied

    error_messages = []
    stats = {}

    if "reset" in request.POST:
        try:
            return handle_reset(request)
        except ReindexError as e:
            error_messages.append(u"Error: %s" % e.message)

    if "reindex" in request.POST:
        try:
            return handle_reindex(request)
        except ReindexError as e:
            error_messages.append(u"Error: %s" % e.message)

    if "recreate_index" in request.POST:
        try:
            return handle_recreate_index(request)
        except ReindexError as e:
            error_messages.append(u"Error: %s" % e.message)

    if "delete_index" in request.POST:
        try:
            return handle_delete(request)
        except DeleteError as e:
            error_messages.append(u"Error: %s" % e.message)
        except ES_EXCEPTIONS as e:
            error_messages.append("Error: {0}".format(repr(e)))

    stats = None
    write_stats = None
    es_deets = None
    indexes = []
    outstanding_chunks = None

    try:
        # TODO: SUMO has a single ES_URL and that's the ZLB and does
        # the balancing. If that ever changes and we have multiple
        # ES_URLs, then this should get fixed.
        es_deets = requests.get(settings.ES_URLS[0]).json()
    except requests.exceptions.RequestException:
        pass

    stats = {}
    for index in all_read_indexes():
        try:
            stats[index] = get_doctype_stats(index)
        except ES_EXCEPTIONS:
            stats[index] = None

    write_stats = {}
    for index in all_write_indexes():
        try:
            write_stats[index] = get_doctype_stats(index)
        except ES_EXCEPTIONS:
            write_stats[index] = None

    try:
        indexes = get_indexes()
        indexes.sort(key=lambda m: m[0])
    except ES_EXCEPTIONS as e:
        error_messages.append("Error: {0}".format(repr(e)))

    try:
        client = redis_client("default")
        outstanding_chunks = int(client.get(OUTSTANDING_INDEX_CHUNKS))
    except (RedisError, TypeError):
        pass

    recent_records = Record.objects.order_by("-starttime")[:100]

    outstanding_records = Record.objects.filter(endtime__isnull=True).order_by("-starttime")

    index_groups = set(settings.ES_INDEXES.keys())
    index_groups |= set(settings.ES_WRITE_INDEXES.keys())

    index_group_data = [[group, read_index(group), write_index(group)] for group in index_groups]

    return render(
        request,
        "admin/search_maintenance.html",
        {
            "title": "Search",
            "es_deets": es_deets,
            "doctype_stats": stats,
            "doctype_write_stats": write_stats,
            "indexes": indexes,
            "index_groups": index_groups,
            "index_group_data": index_group_data,
            "read_indexes": all_read_indexes,
            "write_indexes": all_write_indexes,
            "error_messages": error_messages,
            "recent_records": recent_records,
            "outstanding_records": outstanding_records,
            "outstanding_chunks": outstanding_chunks,
            "now": datetime.now(),
            "read_index": read_index,
            "write_index": write_index,
        },
    )
Esempio n. 17
0
 def teardown_indexes(self):
     """Tear down write index"""
     es_utils.delete_index(es_utils.write_index())
Esempio n. 18
0
 def get_index(cls):
     return es_utils.write_index()
Esempio n. 19
0
def handle_reindex(request):
    """Caculates and kicks off indexing tasks"""
    # This is truthy if the user wants us to delete and recreate
    # the index first.
    delete_index_first = bool(request.POST.get('delete_index'))

    if delete_index_first:
        # Coming from the delete form, so we reindex all models.
        mapping_types_to_index = None
    else:
        # Coming from the reindex form, so we reindex whatever we're
        # told.
        mapping_types_to_index = [
            name.replace('check_', '') for name in request.POST.keys()
            if name.startswith('check_')
        ]

    # TODO: If this gets fux0rd, then it's possible this could be
    # non-zero and we really want to just ignore it. Need the ability
    # to ignore it.
    try:
        client = redis_client('default')
        val = client.get(OUTSTANDING_INDEX_CHUNKS)
        if val is not None and int(val) > 0:
            raise ReindexError('There are %s outstanding chunks.' % val)

        # We don't know how many chunks we're building, but we do want
        # to make sure another reindex request doesn't slide in here
        # and kick off a bunch of chunks.
        #
        # There is a race condition here.
        client.set(OUTSTANDING_INDEX_CHUNKS, 1)
    except RedisError:
        log.warning('Redis not running. Can not check if there are '
                    'outstanding tasks.')

    batch_id = create_batch_id()

    # Break up all the things we want to index into chunks. This
    # chunkifies by class then by chunk size.
    chunks = []
    for cls, indexable in get_indexable(mapping_types=mapping_types_to_index):
        chunks.extend((cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE))

    if delete_index_first:
        # The previous lines do a lot of work and take some time to
        # execute.  So we wait until here to wipe and rebuild the
        # index. That reduces the time that there is no index by a little.
        recreate_index()

    chunks_count = len(chunks)

    try:
        client = redis_client('default')
        client.set(OUTSTANDING_INDEX_CHUNKS, chunks_count)
    except RedisError:
        log.warning('Redis not running. Can\'t denote outstanding tasks.')

    for chunk in chunks:
        index_chunk_task.delay(write_index(), batch_id, chunk)

    return HttpResponseRedirect(request.path)
Esempio n. 20
0
    try:
        # TODO: SUMO has a single ES_URL and that's the ZLB and does
        # the balancing. If that ever changes and we have multiple
        # ES_URLs, then this should get fixed.
        es_deets = requests.get(settings.ES_URLS[0]).json()
    except requests.exceptions.RequestException:
        pass

    try:
        stats = get_doctype_stats(read_index())
    except ES_EXCEPTIONS:
        pass

    try:
        write_stats = get_doctype_stats(write_index())
    except ES_EXCEPTIONS:
        pass

    try:
        indexes = get_indexes()
        indexes.sort(key=lambda m: m[0])
    except ES_EXCEPTIONS as e:
        error_messages.append('Error: {0}'.format(repr(e)))

    try:
        client = redis_client('default')
        outstanding_chunks = int(client.get(OUTSTANDING_INDEX_CHUNKS))
    except (RedisError, TypeError):
        pass