Example #1
0
    def test_index_chunk_task(self):
        simple_items = [simple(save=True) for i in range(10)]

        # With live indexing, that'll create items in the index. Since
        # we want to test index_chunk_test, we need a clean index to
        # start with so we delete and recreate it.
        self.setup_indexes(empty=True)

        self.refresh()

        # Verify there's nothing in the index.
        eq_(len(SimpleIndex.search()), 0)

        # Create the record and the chunk and then run it through
        # celery.
        batch_id = 'ou812'
        rec = record(batch_id=batch_id, save=True)

        chunk = (SimpleIndex, [item.id for item in simple_items])
        index_chunk_task.delay(get_index(), batch_id, rec.id, chunk)

        # Verify everything is in the index now.
        eq_(len(SimpleIndex.search()), 10)

        # Verify the record was marked succeeded.
        rec = Record.objects.get(pk=rec.id)
        eq_(rec.status, Record.STATUS_SUCCESS)
Example #2
0
def handle_reindex(request):
    """Caculate chunks and kick off indexing tasks."""
    index = get_index()

    batch_id = create_batch_id()

    # Break up all the things we want to index into chunks. This
    # chunkifies by class then by chunk size.
    chunks = []
    for cls, indexable in get_indexable():
        chunks.extend(
            (cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE))

    # The previous lines do a lot of work and take some time to
    # execute.  So we wait until here to wipe and rebuild the
    # index. That reduces the time that there is no index by a little.
    recreate_index()

    for cls, id_list in chunks:
        chunk_name = '%s %d -> %d' % (cls.get_mapping_type_name(),
                                      id_list[0], id_list[-1])
        rec = Record(batch_id=batch_id, name=chunk_name)
        rec.save()
        index_chunk_task.delay(index, batch_id, rec.id,
                               (to_class_path(cls), id_list))

    return HttpResponseRedirect(request.path)
Example #3
0
    def test_index_chunk_task(self):
        simple_items = [simple(save=True) for i in range(10)]

        # With live indexing, that'll create items in the index. Since
        # we want to test index_chunk_test, we need a clean index to
        # start with so we delete and recreate it.
        self.setup_indexes(empty=True)

        self.refresh()

        # Verify there's nothing in the index.
        eq_(len(SimpleIndex.search()), 0)

        # Create the record and the chunk and then run it through
        # celery.
        batch_id = "ou812"
        rec = record(batch_id=batch_id, save=True)

        chunk = (SimpleIndex, [item.id for item in simple_items])
        index_chunk_task.delay(get_index(), batch_id, rec.id, chunk)

        # Verify everything is in the index now.
        eq_(len(SimpleIndex.search()), 10)

        # Verify the record was marked succeeded.
        rec = Record.objects.get(pk=rec.id)
        eq_(rec.status, Record.STATUS_SUCCESS)
Example #4
0
    def test_index_chunk_task(self):
        responses = ResponseFactory.create_batch(10)

        # With live indexing, that'll create items in the index. Since
        # we want to test index_chunk_test, we need a clean index to
        # start with so we delete and recreate it.
        self.setup_indexes(empty=True)

        # Verify there's nothing in the index.
        eq_(len(ResponseMappingType.search()), 0)

        # Create the record and the chunk and then run it through
        # celery.
        batch_id = 'ou812'
        rec = RecordFactory(batch_id=batch_id)

        chunk = (to_class_path(ResponseMappingType),
                 [item.id for item in responses])
        index_chunk_task.delay(get_index(), batch_id, rec.id, chunk)

        ResponseMappingType.refresh_index()

        # Verify everything is in the index now.
        eq_(len(ResponseMappingType.search()), 10)

        # Verify the record was marked succeeded.
        rec = Record.objects.get(pk=rec.id)
        eq_(rec.status, Record.STATUS_SUCCESS)
Example #5
0
def handle_reindex(request):
    """Caculate chunks and kick off indexing tasks."""
    index = get_index()

    batch_id = create_batch_id()

    # Break up all the things we want to index into chunks. This
    # chunkifies by class then by chunk size.
    chunks = []
    for cls, indexable in get_indexable():
        chunks.extend((cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE))

    # The previous lines do a lot of work and take some time to
    # execute.  So we wait until here to wipe and rebuild the
    # index. That reduces the time that there is no index by a little.
    recreate_index()

    for cls, id_list in chunks:
        chunk_name = '%s %d -> %d' % (cls.get_mapping_type_name(), id_list[0],
                                      id_list[-1])
        rec = Record(batch_id=batch_id, name=chunk_name)
        rec.save()
        index_chunk_task.delay(index, batch_id, rec.id, (cls, id_list))

    return HttpResponseRedirect(request.path)
Example #6
0
def search_admin_view(request):
    """Render the admin view containing search tools"""
    error_messages = []
    stats = None
    es_deets = None
    indexes = []

    reset_requested = 'reset' in request.POST
    if reset_requested:
        try:
            return handle_reset(request)
        except Exception as exc:
            error_messages.append(u'Error: %s' % exc.message)

    reindex_requested = 'reindex' in request.POST
    if reindex_requested:
        try:
            return handle_reindex(request)
        except Exception as exc:
            error_messages.append(u'Error: %s' % exc.message)

    try:
        # This gets index stats, but also tells us whether ES is in
        # a bad state.
        try:
            stats = get_index_stats()
        except NotFoundError:
            stats = None

        indexes = get_indexes()
        indexes.sort(key=lambda m: m[0])

        # TODO: Input has a single ES_URL and that's the ZLB and does
        # the balancing. If that ever changes and we have multiple
        # ES_URLs, then this should get fixed.
        es_deets = requests.get(settings.ES_URLS[0]).json()
    except ConnectionError:
        error_messages.append('Error: Elastic Search is not set up on this '
                              'machine or timed out trying to respond. '
                              '(ConnectionError/Timeout)')
    except NotFoundError:
        error_messages.append('Error: Index is missing. Press the reindex '
                              'button below. (ElasticHttpNotFoundError)')

    outstanding_records = Record.outstanding()
    recent_records = Record.objects.order_by('-creation_time')[:20]

    return render(
        request, 'admin/search_admin_view.html', {
            'title': 'Search',
            'es_deets': es_deets,
            'mapping_type_stats': stats,
            'indexes': indexes,
            'index': get_index(),
            'error_messages': error_messages,
            'recent_records': recent_records,
            'outstanding_records': outstanding_records,
            'now': datetime.now(),
        })
Example #7
0
 def teardown_indexes(self):
     es = get_es()
     try:
         es.indices.delete(get_index())
     except NotFoundError:
         # If we get this error, it means the index didn't exist
         # so there's nothing to delete.
         pass
Example #8
0
 def teardown_indexes(self):
     es = get_es()
     try:
         es.delete_index(get_index())
     except ElasticHttpNotFoundError:
         # If we get this error, it means the index didn't exist
         # so there's nothing to delete.
         pass
Example #9
0
def search_admin_view(request):
    """Render the admin view containing search tools"""
    error_messages = []
    stats = None
    es_deets = None
    indexes = []

    reset_requested = 'reset' in request.POST
    if reset_requested:
        try:
            return handle_reset(request)
        except Exception as exc:
            error_messages.append(u'Error: %s' % exc.message)

    reindex_requested = 'reindex' in request.POST
    if reindex_requested:
        try:
            return handle_reindex(request)
        except Exception as exc:
            error_messages.append(u'Error: %s' % exc.message)

    try:
        # This gets index stats, but also tells us whether ES is in
        # a bad state.
        try:
            stats = get_index_stats()
        except NotFoundError:
            stats = None

        indexes = get_indexes()
        indexes.sort(key=lambda m: m[0])

        # TODO: Input has a single ES_URL and that's the ZLB and does
        # the balancing. If that ever changes and we have multiple
        # ES_URLs, then this should get fixed.
        es_deets = requests.get(settings.ES_URLS[0]).json()
    except ConnectionError:
        error_messages.append('Error: Elastic Search is not set up on this '
                              'machine or timed out trying to respond. '
                              '(ConnectionError/Timeout)')
    except NotFoundError:
        error_messages.append('Error: Index is missing. Press the reindex '
                              'button below. (ElasticHttpNotFoundError)')

    outstanding_records = Record.outstanding()
    recent_records = Record.objects.order_by('-creation_time')[:20]

    return render(request, 'admin/search_admin_view.html', {
            'title': 'Search',
            'es_deets': es_deets,
            'mapping_type_stats': stats,
            'indexes': indexes,
            'index': get_index(),
            'error_messages': error_messages,
            'recent_records': recent_records,
            'outstanding_records': outstanding_records,
            'now': datetime.now(),
            })
Example #10
0
def search_admin_view(request):
    """Render the admin view containing search tools"""
    error_messages = []
    stats = None
    indexes = []

    reset_requested = 'reset' in request.POST
    if reset_requested:
        try:
            return handle_reset(request)
        except Exception as exc:
            error_messages.append(u'Error: %s' % exc.message)

    reindex_requested = 'reindex' in request.POST
    if reindex_requested:
        try:
            return handle_reindex(request)
        except Exception as exc:
            error_messages.append(u'Error: %s' % exc.message)

    try:
        # This gets index stats, but also tells us whether ES is in
        # a bad state.
        try:
            stats = get_index_stats()
        except pyes.exceptions.IndexMissingException:
            stats = None
        indexes = get_indexes()
        indexes.sort(key=lambda m: m[0])

    except pyes.urllib3.MaxRetryError:
        error_messages.append('Error: Elastic Search is not set up on this '
                              'machine or is not responding. (MaxRetryError)')
    except pyes.exceptions.IndexMissingException:
        error_messages.append('Error: Index is missing. Press the reindex '
                              'button below. (IndexMissingException)')
    except pyes.urllib3.TimeoutError:
        error_messages.append('Error: Connection to Elastic Search timed out. '
                              '(TimeoutError)')

    outstanding_records = Record.outstanding()
    recent_records = Record.objects.order_by('-creation_time')[:20]

    return render(
        request, 'admin/search_admin_view.html', {
            'title': 'Search',
            'mapping_type_stats': stats,
            'indexes': indexes,
            'index': get_index(),
            'error_messages': error_messages,
            'recent_records': recent_records,
            'outstanding_records': outstanding_records,
            'now': datetime.now(),
        })
Example #11
0
    def refresh(self, timesleep=0):
        index = get_index()

        # Any time we're doing a refresh, we're making sure that the
        # index is ready to be queried.  Given that, it's almost
        # always the case that we want to run all the generated tasks,
        # then refresh.
        # TODO: uncomment this when we have live indexing.
        # generate_tasks()

        get_indexing_es().refresh(index, timesleep=timesleep)
Example #12
0
def search_admin_view(request):
    """Render the admin view containing search tools"""
    error_messages = []
    stats = None
    indexes = []

    reset_requested = 'reset' in request.POST
    if reset_requested:
        try:
            return handle_reset(request)
        except Exception as exc:
            error_messages.append(u'Error: %s' % exc.message)

    reindex_requested = 'reindex' in request.POST
    if reindex_requested:
        try:
            return handle_reindex(request)
        except Exception as exc:
            error_messages.append(u'Error: %s' % exc.message)

    try:
        # This gets index stats, but also tells us whether ES is in
        # a bad state.
        try:
            stats = get_index_stats()
        except pyes.exceptions.IndexMissingException:
            stats = None
        indexes = get_indexes()
        indexes.sort(key=lambda m: m[0])

    except pyes.urllib3.MaxRetryError:
        error_messages.append('Error: Elastic Search is not set up on this '
                              'machine or is not responding. (MaxRetryError)')
    except pyes.exceptions.IndexMissingException:
        error_messages.append('Error: Index is missing. Press the reindex '
                              'button below. (IndexMissingException)')
    except pyes.urllib3.TimeoutError:
        error_messages.append('Error: Connection to Elastic Search timed out. '
                              '(TimeoutError)')

    outstanding_records = Record.outstanding()
    recent_records = Record.objects.order_by('-creation_time')[:20]

    return render(request, 'admin/search_admin_view.html', {
            'title': 'Search',
            'mapping_type_stats': stats,
            'indexes': indexes,
            'index': get_index(),
            'error_messages': error_messages,
            'recent_records': recent_records,
            'outstanding_records': outstanding_records,
            'now': datetime.now(),
            })
Example #13
0
File: admin.py Project: rlr/fjord
def timezone_view(request):
    """Admin view showing times and timezones in data."""
    # Note: This is an admin page that gets used once in a blue moon.
    # As such, I'm taking some liberties (hand-indexing the response,
    # time.sleep, etc) that I would never take if it was used more
    # often or was viewable by users. If these two assumptions ever
    # change, then this should be rewritten.

    from elasticutils.contrib.django import get_es

    from fjord.feedback.models import Response, ResponseMappingType
    from fjord.feedback.tests import ResponseFactory
    from fjord.search.index import get_index

    server_time = datetime.now()

    # Create a new response.
    resp = ResponseFactory.create()
    resp_time = resp.created

    # Index the response by hand so we know it gets to Elasticsearch. Otherwise
    # it gets done by celery and we don't know how long that'll take.
    doc = ResponseMappingType.extract_document(resp.id)
    ResponseMappingType.index(doc, resp.id)

    # Fetch the response from the db.
    resp = Response.objects.get(id=resp.id)
    resp2_time = resp.created

    # Refresh and sleep 5 seconds as a hand-wavey way to make sure
    # that Elasticsearch has had time to refresh the index.
    get_es().indices.refresh(get_index())
    time.sleep(5)

    es_time = ResponseMappingType.search().filter(id=resp.id)[0].created

    # Delete the test response we created.
    resp.delete()

    return render(request, 'admin/timezone_view.html', {
        'server_time': server_time,
        'resp_time': resp_time,
        'resp2_time': resp2_time,
        'es_time': es_time
    })
Example #14
0
def reindex():
    """Calculates and creates indexing chunks"""
    index = get_index()

    batch_id = create_batch_id()

    # Break up all the things we want to index into chunks. This
    # chunkifies by class then by chunk size.
    chunks = []
    for cls, indexable in get_indexable():
        chunks.extend(
            (cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE))

    for cls, id_list in chunks:
        chunk_name = '%s %d -> %d' % (cls.get_mapping_type_name(),
                                      id_list[0], id_list[-1])
        rec = Record(batch_id=batch_id, name=chunk_name)
        rec.save()
        index_chunk_task.delay(index, batch_id, rec.id,
                               (to_class_path(cls), id_list))
Example #15
0
def monitor_view(request):
    """View for services monitor."""
    # Dict of infrastructure name -> list of output tuples of (INFO,
    # msg) or (ERROR, msg)
    status = {}

    # Note: To add a new component, do your testing and then add a
    # name -> list of output tuples map to status.

    # Check memcached.
    memcache_results = []
    try:
        for cache_name, cache_props in settings.CACHES.items():
            result = True
            backend = cache_props['BACKEND']
            location = cache_props['LOCATION']

            # LOCATION can be a string or a list of strings
            if isinstance(location, basestring):
                location = location.split(';')

            if 'memcache' in backend:
                for loc in location:
                    # TODO: this doesn't handle unix: variant
                    ip, port = loc.split(':')
                    result = test_memcached(ip, int(port))
                    memcache_results.append(
                        (INFO, '%s:%s %s' % (ip, port, result)))

        if not memcache_results:
            memcache_results.append((ERROR, 'memcache is not configured.'))

        elif len(memcache_results) < 2:
            memcache_results.append(
                (ERROR, ('You should have at least 2 memcache servers. '
                         'You have %s.' % len(memcache_results))))

        else:
            memcache_results.append((INFO, 'memcached servers look good.'))

    except Exception as exc:
        memcache_results.append(
            (ERROR, 'Exception while looking at memcached: %s' % str(exc)))

    status['memcached'] = memcache_results

    # Check ES.
    es_results = []
    try:
        get_index_stats()
        es_results.append(
            (INFO, ('Successfully connected to ElasticSearch and index '
                    'exists.')))

    except (ConnectionError, Timeout) as exc:
        es_results.append(
            (ERROR, 'Cannot connect to ElasticSearch: %s' % str(exc)))

    except ElasticHttpNotFoundError:
        es_results.append(
            (ERROR, 'Index "%s" missing.' % get_index()))

    except Exception as exc:
        es_results.append(
            (ERROR, 'Exception while looking at ElasticSearch: %s' % str(exc)))

    status['ElasticSearch'] = es_results

    # Check RabbitMQ.
    rabbitmq_results = []
    try:
        rabbit_conn = establish_connection(connect_timeout=2)
        rabbit_conn.connect()
        rabbitmq_results.append(
            (INFO, 'Successfully connected to RabbitMQ.'))

    except (socket.error, IOError) as exc:
        rabbitmq_results.append(
            (ERROR, 'Error connecting to RabbitMQ: %s' % str(exc)))

    except Exception as exc:
        rabbitmq_results.append(
            (ERROR, 'Exception while looking at RabbitMQ: %s' % str(exc)))

    status['RabbitMQ'] = rabbitmq_results

    status_code = 200

    status_summary = {}
    for component, output in status.items():
        if ERROR in [item[0] for item in output]:
            status_code = 500
            status_summary[component] = False
        else:
            status_summary[component] = True

    return render(request, 'services/monitor.html',
                  {'component_status': status,
                   'status_summary': status_summary},
                  status=status_code)
Example #16
0
 def teardown_indexes(self):
     es = get_indexing_es()
     es.delete_index_if_exists(get_index())
Example #17
0
def monitor_view(request):
    """View for services monitor."""
    # Dict of infrastructure name -> list of output tuples of (INFO,
    # msg) or (ERROR, msg)
    status = {}

    # Note: To add a new component, do your testing and then add a
    # name -> list of output tuples map to status.

    # Check memcached.
    memcache_results = []
    try:
        for cache_name, cache_props in settings.CACHES.items():
            result = True
            backend = cache_props['BACKEND']
            location = cache_props['LOCATION']

            # LOCATION can be a string or a list of strings
            if isinstance(location, basestring):
                location = location.split(';')

            if 'memcache' in backend:
                for loc in location:
                    # TODO: this doesn't handle unix: variant
                    ip, port = loc.split(':')
                    result = test_memcached(ip, int(port))
                    memcache_results.append(
                        (INFO, '%s:%s %s' % (ip, port, result)))

        if not memcache_results:
            memcache_results.append((ERROR, 'memcache is not configured.'))

        elif len(memcache_results) < 2:
            memcache_results.append(
                (ERROR, ('You should have at least 2 memcache servers. '
                         'You have %s.' % len(memcache_results))))

        else:
            memcache_results.append((INFO, 'memcached servers look good.'))

    except Exception as exc:
        memcache_results.append(
            (ERROR, 'Exception while looking at memcached: %s' % str(exc)))

    status['memcached'] = memcache_results

    # Check ES.
    es_results = []
    try:
        get_index_stats()
        es_results.append(
            (INFO, ('Successfully connected to ElasticSearch and index '
                    'exists.')))

    except ConnectionError as exc:
        es_results.append(
            (ERROR, 'Cannot connect to ElasticSearch: %s' % str(exc)))

    except NotFoundError:
        es_results.append(
            (ERROR, 'Index "%s" missing.' % get_index()))

    except Exception as exc:
        es_results.append(
            (ERROR, 'Exception while looking at ElasticSearch: %s' % str(exc)))

    status['ElasticSearch'] = es_results

    # Check RabbitMQ.
    rabbitmq_results = []
    try:
        rabbit_conn = establish_connection(connect_timeout=2)
        rabbit_conn.connect()
        rabbitmq_results.append(
            (INFO, 'Successfully connected to RabbitMQ.'))

    except (socket.error, IOError) as exc:
        rabbitmq_results.append(
            (ERROR, 'Error connecting to RabbitMQ: %s' % str(exc)))

    except Exception as exc:
        rabbitmq_results.append(
            (ERROR, 'Exception while looking at RabbitMQ: %s' % str(exc)))

    status['RabbitMQ'] = rabbitmq_results

    status_code = 200

    status_summary = {}
    for component, output in status.items():
        if ERROR in [item[0] for item in output]:
            status_code = 500
            status_summary[component] = False
        else:
            status_summary[component] = True

    return render(request, 'services/monitor.html',
                  {'component_status': status,
                   'status_summary': status_summary},
                  status=status_code)