def elasticsearch_status(request): client = get_es_client() disk_status = { disk['node']: disk for disk in _get_es_meta( client, 'allocation', ['node', 'shards', 'disk.avail', 'disk.used', 'disk.percent']) } for node in _get_es_meta( client, 'nodes', ['name', 'heap.percent'], filter_rows=lambda node: node['name'] in disk_status): disk_status[node.pop('name')].update(node) indices, seqr_index_projects = _get_es_indices(client) errors = [ '{} does not exist and is used by project(s) {}'.format( index, ', '.join([ '{} ({} samples)'.format(p.name, len(indivs)) for p, indivs in project_individuals.items() ])) for index, project_individuals in seqr_index_projects.items() if project_individuals ] return create_json_response({ 'indices': indices, 'diskStats': list(disk_status.values()), 'elasticsearchHost': ELASTICSEARCH_SERVER, 'errors': errors, })
def get_elasticsearch_index_samples(elasticsearch_index): es_client = get_es_client() index_metadata = get_index_metadata(elasticsearch_index, es_client).get(elasticsearch_index) s = elasticsearch_dsl.Search(using=es_client, index=elasticsearch_index) s = s.params(size=0) s.aggs.bucket('sample_ids', elasticsearch_dsl.A('terms', field='samples_num_alt_1', size=10000)) response = s.execute() return [agg['key'] for agg in response.aggregations.sample_ids.buckets], index_metadata
def get_elasticsearch_index_samples(elasticsearch_index, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS): es_client = get_es_client() index_metadata = get_index_metadata(elasticsearch_index, es_client).get(elasticsearch_index) s = elasticsearch_dsl.Search(using=es_client, index=elasticsearch_index) s = s.params(size=0) s.aggs.bucket('sample_ids', elasticsearch_dsl.A('terms', field=SAMPLE_FIELDS_MAP[dataset_type], size=10000)) response = s.execute() return [agg['key'] for agg in response.aggregations.sample_ids.buckets], index_metadata
def elasticsearch_status(request): client = get_es_client() disk_fields = ['node', 'shards', 'disk.avail', 'disk.used', 'disk.percent'] disk_status = [{ _to_camel_case(field.replace('.', '_')): disk[field] for field in disk_fields } for disk in client.cat.allocation(format="json", h=','.join(disk_fields))] index_fields = ['index', 'docs.count', 'store.size', 'creation.date.string'] indices = [{ _to_camel_case(field.replace('.', '_')): index[field] for field in index_fields } for index in client.cat.indices(format="json", h=','.join(index_fields)) if all(not index['index'].startswith(omit_prefix) for omit_prefix in ['.', 'index_operations_log'])] aliases = defaultdict(list) for alias in client.cat.aliases(format="json", h='alias,index'): aliases[alias['alias']].append(alias['index']) index_metadata = get_index_metadata('_all', client, use_cache=False) active_samples = Sample.objects.filter(is_active=True).select_related('individual__family__project') seqr_index_projects = defaultdict(lambda: defaultdict(set)) es_projects = set() for sample in active_samples: for index_name in sample.elasticsearch_index.split(','): project = sample.individual.family.project es_projects.add(project) if index_name in aliases: for aliased_index_name in aliases[index_name]: seqr_index_projects[aliased_index_name][project].add(sample.individual.guid) else: seqr_index_projects[index_name.rstrip('*')][project].add(sample.individual.guid) for index in indices: index_name = index['index'] index.update(index_metadata[index_name]) projects_for_index = [] for index_prefix in list(seqr_index_projects.keys()): if index_name.startswith(index_prefix): projects_for_index += list(seqr_index_projects.pop(index_prefix).keys()) index['projects'] = [{'projectGuid': project.guid, 'projectName': project.name} for project in projects_for_index] errors = ['{} does not exist and is used by project(s) {}'.format( index, ', '.join(['{} ({} samples)'.format(p.name, len(indivs)) for p, indivs in project_individuals.items()]) ) for index, project_individuals in seqr_index_projects.items() if project_individuals] return create_json_response({ 'indices': indices, 'diskStats': disk_status, 'elasticsearchHost': ELASTICSEARCH_SERVER, 'errors': errors, })
def get_elasticsearch_index_samples(elasticsearch_index): es_client = get_es_client() index_metadata = get_index_metadata( elasticsearch_index, es_client, include_fields=True).get(elasticsearch_index) sample_field = next((field for field in SAMPLE_FIELDS_LIST if field in index_metadata['fields'].keys())) s = elasticsearch_dsl.Search(using=es_client, index=elasticsearch_index) s = s.params(size=0) s.aggs.bucket('sample_ids', elasticsearch_dsl.A('terms', field=sample_field, size=10000)) response = s.execute() return [agg['key'] for agg in response.aggregations.sample_ids.buckets ], index_metadata
def status_view(request): """Status endpoint for monitoring app availability.""" dependent_services_ok = True # Test database connection for db_connection_key in DATABASES.keys(): try: connections[db_connection_key].cursor() except Exception as e: dependent_services_ok = False logger.error('Database "{}" connection error: {}'.format( db_connection_key, e)) # Test redis connection try: redis.StrictRedis(host=REDIS_SERVICE_HOSTNAME, socket_connect_timeout=3).ping() except Exception as e: dependent_services_ok = False logger.error('Redis connection error: {}'.format(str(e))) # Test elasticsearch connection try: if not get_es_client(timeout=3, max_retries=0).ping(): raise ValueError('No response from elasticsearch ping') except Exception as e: dependent_services_ok = False logger.error('Elasticsearch connection error: {}'.format(str(e))) # Test kibana connection try: requests.head('http://{}/status'.format(KIBANA_SERVER), timeout=3).raise_for_status() except Exception as e: dependent_services_ok = False logger.error('Kibana connection error: {}'.format(str(e))) return create_json_response( { 'version': SEQR_VERSION, 'dependent_services_ok': dependent_services_ok }, status=200 if dependent_services_ok else 400)
def delete_index(request): index = json.loads(request.body)['index'] active_index_samples = Sample.objects.filter(is_active=True, elasticsearch_index=index) if active_index_samples: projects = { sample.individual.family.project.name for sample in active_index_samples.select_related( 'individual__family__project') } return create_json_response( { 'error': 'Index "{}" is still used by: {}'.format( index, ', '.join(projects)) }, status=403) client = get_es_client() client.indices.delete(index) updated_indices, _ = _get_es_indices(client) return create_json_response({'indices': updated_indices})