def test_index_chunk_task(self): simple_items = [simple(save=True) for i in range(10)] # With live indexing, that'll create items in the index. Since # we want to test index_chunk_test, we need a clean index to # start with so we delete and recreate it. self.setup_indexes(empty=True) self.refresh() # Verify there's nothing in the index. eq_(len(SimpleIndex.search()), 0) # Create the record and the chunk and then run it through # celery. batch_id = 'ou812' rec = record(batch_id=batch_id, save=True) chunk = (SimpleIndex, [item.id for item in simple_items]) index_chunk_task.delay(get_index(), batch_id, rec.id, chunk) # Verify everything is in the index now. eq_(len(SimpleIndex.search()), 10) # Verify the record was marked succeeded. rec = Record.objects.get(pk=rec.id) eq_(rec.status, Record.STATUS_SUCCESS)
def handle_reindex(request): """Caculate chunks and kick off indexing tasks.""" index = get_index() batch_id = create_batch_id() # Break up all the things we want to index into chunks. This # chunkifies by class then by chunk size. chunks = [] for cls, indexable in get_indexable(): chunks.extend( (cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE)) # The previous lines do a lot of work and take some time to # execute. So we wait until here to wipe and rebuild the # index. That reduces the time that there is no index by a little. recreate_index() for cls, id_list in chunks: chunk_name = '%s %d -> %d' % (cls.get_mapping_type_name(), id_list[0], id_list[-1]) rec = Record(batch_id=batch_id, name=chunk_name) rec.save() index_chunk_task.delay(index, batch_id, rec.id, (to_class_path(cls), id_list)) return HttpResponseRedirect(request.path)
def test_index_chunk_task(self): simple_items = [simple(save=True) for i in range(10)] # With live indexing, that'll create items in the index. Since # we want to test index_chunk_test, we need a clean index to # start with so we delete and recreate it. self.setup_indexes(empty=True) self.refresh() # Verify there's nothing in the index. eq_(len(SimpleIndex.search()), 0) # Create the record and the chunk and then run it through # celery. batch_id = "ou812" rec = record(batch_id=batch_id, save=True) chunk = (SimpleIndex, [item.id for item in simple_items]) index_chunk_task.delay(get_index(), batch_id, rec.id, chunk) # Verify everything is in the index now. eq_(len(SimpleIndex.search()), 10) # Verify the record was marked succeeded. rec = Record.objects.get(pk=rec.id) eq_(rec.status, Record.STATUS_SUCCESS)
def test_index_chunk_task(self): responses = ResponseFactory.create_batch(10) # With live indexing, that'll create items in the index. Since # we want to test index_chunk_test, we need a clean index to # start with so we delete and recreate it. self.setup_indexes(empty=True) # Verify there's nothing in the index. eq_(len(ResponseMappingType.search()), 0) # Create the record and the chunk and then run it through # celery. batch_id = 'ou812' rec = RecordFactory(batch_id=batch_id) chunk = (to_class_path(ResponseMappingType), [item.id for item in responses]) index_chunk_task.delay(get_index(), batch_id, rec.id, chunk) ResponseMappingType.refresh_index() # Verify everything is in the index now. eq_(len(ResponseMappingType.search()), 10) # Verify the record was marked succeeded. rec = Record.objects.get(pk=rec.id) eq_(rec.status, Record.STATUS_SUCCESS)
def handle_reindex(request): """Caculate chunks and kick off indexing tasks.""" index = get_index() batch_id = create_batch_id() # Break up all the things we want to index into chunks. This # chunkifies by class then by chunk size. chunks = [] for cls, indexable in get_indexable(): chunks.extend((cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE)) # The previous lines do a lot of work and take some time to # execute. So we wait until here to wipe and rebuild the # index. That reduces the time that there is no index by a little. recreate_index() for cls, id_list in chunks: chunk_name = '%s %d -> %d' % (cls.get_mapping_type_name(), id_list[0], id_list[-1]) rec = Record(batch_id=batch_id, name=chunk_name) rec.save() index_chunk_task.delay(index, batch_id, rec.id, (cls, id_list)) return HttpResponseRedirect(request.path)
def search_admin_view(request): """Render the admin view containing search tools""" error_messages = [] stats = None es_deets = None indexes = [] reset_requested = 'reset' in request.POST if reset_requested: try: return handle_reset(request) except Exception as exc: error_messages.append(u'Error: %s' % exc.message) reindex_requested = 'reindex' in request.POST if reindex_requested: try: return handle_reindex(request) except Exception as exc: error_messages.append(u'Error: %s' % exc.message) try: # This gets index stats, but also tells us whether ES is in # a bad state. try: stats = get_index_stats() except NotFoundError: stats = None indexes = get_indexes() indexes.sort(key=lambda m: m[0]) # TODO: Input has a single ES_URL and that's the ZLB and does # the balancing. If that ever changes and we have multiple # ES_URLs, then this should get fixed. es_deets = requests.get(settings.ES_URLS[0]).json() except ConnectionError: error_messages.append('Error: Elastic Search is not set up on this ' 'machine or timed out trying to respond. ' '(ConnectionError/Timeout)') except NotFoundError: error_messages.append('Error: Index is missing. Press the reindex ' 'button below. (ElasticHttpNotFoundError)') outstanding_records = Record.outstanding() recent_records = Record.objects.order_by('-creation_time')[:20] return render( request, 'admin/search_admin_view.html', { 'title': 'Search', 'es_deets': es_deets, 'mapping_type_stats': stats, 'indexes': indexes, 'index': get_index(), 'error_messages': error_messages, 'recent_records': recent_records, 'outstanding_records': outstanding_records, 'now': datetime.now(), })
def teardown_indexes(self): es = get_es() try: es.indices.delete(get_index()) except NotFoundError: # If we get this error, it means the index didn't exist # so there's nothing to delete. pass
def teardown_indexes(self): es = get_es() try: es.delete_index(get_index()) except ElasticHttpNotFoundError: # If we get this error, it means the index didn't exist # so there's nothing to delete. pass
def search_admin_view(request): """Render the admin view containing search tools""" error_messages = [] stats = None es_deets = None indexes = [] reset_requested = 'reset' in request.POST if reset_requested: try: return handle_reset(request) except Exception as exc: error_messages.append(u'Error: %s' % exc.message) reindex_requested = 'reindex' in request.POST if reindex_requested: try: return handle_reindex(request) except Exception as exc: error_messages.append(u'Error: %s' % exc.message) try: # This gets index stats, but also tells us whether ES is in # a bad state. try: stats = get_index_stats() except NotFoundError: stats = None indexes = get_indexes() indexes.sort(key=lambda m: m[0]) # TODO: Input has a single ES_URL and that's the ZLB and does # the balancing. If that ever changes and we have multiple # ES_URLs, then this should get fixed. es_deets = requests.get(settings.ES_URLS[0]).json() except ConnectionError: error_messages.append('Error: Elastic Search is not set up on this ' 'machine or timed out trying to respond. ' '(ConnectionError/Timeout)') except NotFoundError: error_messages.append('Error: Index is missing. Press the reindex ' 'button below. (ElasticHttpNotFoundError)') outstanding_records = Record.outstanding() recent_records = Record.objects.order_by('-creation_time')[:20] return render(request, 'admin/search_admin_view.html', { 'title': 'Search', 'es_deets': es_deets, 'mapping_type_stats': stats, 'indexes': indexes, 'index': get_index(), 'error_messages': error_messages, 'recent_records': recent_records, 'outstanding_records': outstanding_records, 'now': datetime.now(), })
def search_admin_view(request): """Render the admin view containing search tools""" error_messages = [] stats = None indexes = [] reset_requested = 'reset' in request.POST if reset_requested: try: return handle_reset(request) except Exception as exc: error_messages.append(u'Error: %s' % exc.message) reindex_requested = 'reindex' in request.POST if reindex_requested: try: return handle_reindex(request) except Exception as exc: error_messages.append(u'Error: %s' % exc.message) try: # This gets index stats, but also tells us whether ES is in # a bad state. try: stats = get_index_stats() except pyes.exceptions.IndexMissingException: stats = None indexes = get_indexes() indexes.sort(key=lambda m: m[0]) except pyes.urllib3.MaxRetryError: error_messages.append('Error: Elastic Search is not set up on this ' 'machine or is not responding. (MaxRetryError)') except pyes.exceptions.IndexMissingException: error_messages.append('Error: Index is missing. Press the reindex ' 'button below. (IndexMissingException)') except pyes.urllib3.TimeoutError: error_messages.append('Error: Connection to Elastic Search timed out. ' '(TimeoutError)') outstanding_records = Record.outstanding() recent_records = Record.objects.order_by('-creation_time')[:20] return render( request, 'admin/search_admin_view.html', { 'title': 'Search', 'mapping_type_stats': stats, 'indexes': indexes, 'index': get_index(), 'error_messages': error_messages, 'recent_records': recent_records, 'outstanding_records': outstanding_records, 'now': datetime.now(), })
def refresh(self, timesleep=0): index = get_index() # Any time we're doing a refresh, we're making sure that the # index is ready to be queried. Given that, it's almost # always the case that we want to run all the generated tasks, # then refresh. # TODO: uncomment this when we have live indexing. # generate_tasks() get_indexing_es().refresh(index, timesleep=timesleep)
def search_admin_view(request): """Render the admin view containing search tools""" error_messages = [] stats = None indexes = [] reset_requested = 'reset' in request.POST if reset_requested: try: return handle_reset(request) except Exception as exc: error_messages.append(u'Error: %s' % exc.message) reindex_requested = 'reindex' in request.POST if reindex_requested: try: return handle_reindex(request) except Exception as exc: error_messages.append(u'Error: %s' % exc.message) try: # This gets index stats, but also tells us whether ES is in # a bad state. try: stats = get_index_stats() except pyes.exceptions.IndexMissingException: stats = None indexes = get_indexes() indexes.sort(key=lambda m: m[0]) except pyes.urllib3.MaxRetryError: error_messages.append('Error: Elastic Search is not set up on this ' 'machine or is not responding. (MaxRetryError)') except pyes.exceptions.IndexMissingException: error_messages.append('Error: Index is missing. Press the reindex ' 'button below. (IndexMissingException)') except pyes.urllib3.TimeoutError: error_messages.append('Error: Connection to Elastic Search timed out. ' '(TimeoutError)') outstanding_records = Record.outstanding() recent_records = Record.objects.order_by('-creation_time')[:20] return render(request, 'admin/search_admin_view.html', { 'title': 'Search', 'mapping_type_stats': stats, 'indexes': indexes, 'index': get_index(), 'error_messages': error_messages, 'recent_records': recent_records, 'outstanding_records': outstanding_records, 'now': datetime.now(), })
def timezone_view(request): """Admin view showing times and timezones in data.""" # Note: This is an admin page that gets used once in a blue moon. # As such, I'm taking some liberties (hand-indexing the response, # time.sleep, etc) that I would never take if it was used more # often or was viewable by users. If these two assumptions ever # change, then this should be rewritten. from elasticutils.contrib.django import get_es from fjord.feedback.models import Response, ResponseMappingType from fjord.feedback.tests import ResponseFactory from fjord.search.index import get_index server_time = datetime.now() # Create a new response. resp = ResponseFactory.create() resp_time = resp.created # Index the response by hand so we know it gets to Elasticsearch. Otherwise # it gets done by celery and we don't know how long that'll take. doc = ResponseMappingType.extract_document(resp.id) ResponseMappingType.index(doc, resp.id) # Fetch the response from the db. resp = Response.objects.get(id=resp.id) resp2_time = resp.created # Refresh and sleep 5 seconds as a hand-wavey way to make sure # that Elasticsearch has had time to refresh the index. get_es().indices.refresh(get_index()) time.sleep(5) es_time = ResponseMappingType.search().filter(id=resp.id)[0].created # Delete the test response we created. resp.delete() return render(request, 'admin/timezone_view.html', { 'server_time': server_time, 'resp_time': resp_time, 'resp2_time': resp2_time, 'es_time': es_time })
def reindex(): """Calculates and creates indexing chunks""" index = get_index() batch_id = create_batch_id() # Break up all the things we want to index into chunks. This # chunkifies by class then by chunk size. chunks = [] for cls, indexable in get_indexable(): chunks.extend( (cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE)) for cls, id_list in chunks: chunk_name = '%s %d -> %d' % (cls.get_mapping_type_name(), id_list[0], id_list[-1]) rec = Record(batch_id=batch_id, name=chunk_name) rec.save() index_chunk_task.delay(index, batch_id, rec.id, (to_class_path(cls), id_list))
def monitor_view(request): """View for services monitor.""" # Dict of infrastructure name -> list of output tuples of (INFO, # msg) or (ERROR, msg) status = {} # Note: To add a new component, do your testing and then add a # name -> list of output tuples map to status. # Check memcached. memcache_results = [] try: for cache_name, cache_props in settings.CACHES.items(): result = True backend = cache_props['BACKEND'] location = cache_props['LOCATION'] # LOCATION can be a string or a list of strings if isinstance(location, basestring): location = location.split(';') if 'memcache' in backend: for loc in location: # TODO: this doesn't handle unix: variant ip, port = loc.split(':') result = test_memcached(ip, int(port)) memcache_results.append( (INFO, '%s:%s %s' % (ip, port, result))) if not memcache_results: memcache_results.append((ERROR, 'memcache is not configured.')) elif len(memcache_results) < 2: memcache_results.append( (ERROR, ('You should have at least 2 memcache servers. ' 'You have %s.' % len(memcache_results)))) else: memcache_results.append((INFO, 'memcached servers look good.')) except Exception as exc: memcache_results.append( (ERROR, 'Exception while looking at memcached: %s' % str(exc))) status['memcached'] = memcache_results # Check ES. es_results = [] try: get_index_stats() es_results.append( (INFO, ('Successfully connected to ElasticSearch and index ' 'exists.'))) except (ConnectionError, Timeout) as exc: es_results.append( (ERROR, 'Cannot connect to ElasticSearch: %s' % str(exc))) except ElasticHttpNotFoundError: es_results.append( (ERROR, 'Index "%s" missing.' % get_index())) except Exception as exc: es_results.append( (ERROR, 'Exception while looking at ElasticSearch: %s' % str(exc))) status['ElasticSearch'] = es_results # Check RabbitMQ. rabbitmq_results = [] try: rabbit_conn = establish_connection(connect_timeout=2) rabbit_conn.connect() rabbitmq_results.append( (INFO, 'Successfully connected to RabbitMQ.')) except (socket.error, IOError) as exc: rabbitmq_results.append( (ERROR, 'Error connecting to RabbitMQ: %s' % str(exc))) except Exception as exc: rabbitmq_results.append( (ERROR, 'Exception while looking at RabbitMQ: %s' % str(exc))) status['RabbitMQ'] = rabbitmq_results status_code = 200 status_summary = {} for component, output in status.items(): if ERROR in [item[0] for item in output]: status_code = 500 status_summary[component] = False else: status_summary[component] = True return render(request, 'services/monitor.html', {'component_status': status, 'status_summary': status_summary}, status=status_code)
def teardown_indexes(self): es = get_indexing_es() es.delete_index_if_exists(get_index())
def monitor_view(request): """View for services monitor.""" # Dict of infrastructure name -> list of output tuples of (INFO, # msg) or (ERROR, msg) status = {} # Note: To add a new component, do your testing and then add a # name -> list of output tuples map to status. # Check memcached. memcache_results = [] try: for cache_name, cache_props in settings.CACHES.items(): result = True backend = cache_props['BACKEND'] location = cache_props['LOCATION'] # LOCATION can be a string or a list of strings if isinstance(location, basestring): location = location.split(';') if 'memcache' in backend: for loc in location: # TODO: this doesn't handle unix: variant ip, port = loc.split(':') result = test_memcached(ip, int(port)) memcache_results.append( (INFO, '%s:%s %s' % (ip, port, result))) if not memcache_results: memcache_results.append((ERROR, 'memcache is not configured.')) elif len(memcache_results) < 2: memcache_results.append( (ERROR, ('You should have at least 2 memcache servers. ' 'You have %s.' % len(memcache_results)))) else: memcache_results.append((INFO, 'memcached servers look good.')) except Exception as exc: memcache_results.append( (ERROR, 'Exception while looking at memcached: %s' % str(exc))) status['memcached'] = memcache_results # Check ES. es_results = [] try: get_index_stats() es_results.append( (INFO, ('Successfully connected to ElasticSearch and index ' 'exists.'))) except ConnectionError as exc: es_results.append( (ERROR, 'Cannot connect to ElasticSearch: %s' % str(exc))) except NotFoundError: es_results.append( (ERROR, 'Index "%s" missing.' % get_index())) except Exception as exc: es_results.append( (ERROR, 'Exception while looking at ElasticSearch: %s' % str(exc))) status['ElasticSearch'] = es_results # Check RabbitMQ. rabbitmq_results = [] try: rabbit_conn = establish_connection(connect_timeout=2) rabbit_conn.connect() rabbitmq_results.append( (INFO, 'Successfully connected to RabbitMQ.')) except (socket.error, IOError) as exc: rabbitmq_results.append( (ERROR, 'Error connecting to RabbitMQ: %s' % str(exc))) except Exception as exc: rabbitmq_results.append( (ERROR, 'Exception while looking at RabbitMQ: %s' % str(exc))) status['RabbitMQ'] = rabbitmq_results status_code = 200 status_summary = {} for component, output in status.items(): if ERROR in [item[0] for item in output]: status_code = 500 status_summary[component] = False else: status_summary[component] = True return render(request, 'services/monitor.html', {'component_status': status, 'status_summary': status_summary}, status=status_code)