def create_locus_list_handler(request): request_json = json.loads(request.body) if not request_json.get('name'): return create_json_response({}, status=400, reason='"Name" is required') genes_by_id, intervals, invalid_items = parse_locus_list_items( request_json) if invalid_items and not request_json.get('ignoreInvalidItems'): return create_json_response({'invalidLocusListItems': invalid_items}, status=400, reason=INVALID_ITEMS_ERROR) locus_list = LocusList.objects.create( name=request_json['name'], description=request_json.get('description') or '', is_public=request_json.get('isPublic') or False, created_by=request.user, ) _update_locus_list_items(locus_list, genes_by_id, intervals, request_json) return create_json_response({ 'locusListsByGuid': { locus_list.guid: get_json_for_locus_list(locus_list, request.user) }, 'genesById': genes_by_id, })
def create_locus_list_handler(request): request_json = json.loads(request.body) if not request_json.get('name'): return create_json_response({}, status=400, reason='"Name" is required') genes_by_id, intervals, invalid_items = parse_locus_list_items( request_json) if invalid_items and not request_json.get('ignoreInvalidItems'): return create_json_response({'invalidLocusListItems': invalid_items}, status=400, reason=INVALID_ITEMS_ERROR) try: locus_list = create_model_from_json( LocusList, { 'name': request_json['name'], 'description': request_json.get('description') or '', 'is_public': request_json.get('isPublic') or False, }, request.user) except IntegrityError: return create_json_response({'error': 'This list already exists'}, status=400) _update_locus_list_items(locus_list, genes_by_id, intervals, request_json, request.user) return create_json_response({ 'locusListsByGuid': { locus_list.guid: get_json_for_locus_list(locus_list, request.user) }, 'genesById': genes_by_id, })
def update_locus_list_handler(request, locus_list_guid): locus_list = LocusList.objects.get(guid=locus_list_guid) check_user_created_object_permissions(locus_list, request.user, permission_level=CAN_EDIT) request_json = json.loads(request.body) genes_by_id, intervals, invalid_items = parse_locus_list_items( request_json) if invalid_items and not request_json.get('ignoreInvalidItems'): return create_json_response({'invalidLocusListItems': invalid_items}, status=400, reason=INVALID_ITEMS_ERROR) update_model_from_json(locus_list, request_json, allow_unknown_keys=True) if genes_by_id is not None: _update_locus_list_items(locus_list, genes_by_id, intervals, request_json) return create_json_response({ 'locusListsByGuid': { locus_list.guid: get_json_for_locus_list(locus_list, request.user) }, 'genesById': genes_by_id or {}, })
def update_locus_list_handler(request, locus_list_guid): locus_list = LocusList.objects.get(guid=locus_list_guid) check_object_permissions(locus_list, request.user, permission_level=CAN_EDIT) request_json = json.loads(request.body) genes_by_id, intervals, invalid_items = parse_locus_list_items(request_json) if invalid_items and not request_json.get('ignoreInvalidItems'): return create_json_response({'invalidLocusListItems': invalid_items}, status=400, reason=INVALID_ITEMS_ERROR) update_model_from_json(locus_list, request_json, allow_unknown_keys=True) if genes_by_id is not None: _update_locus_list_items(locus_list, genes_by_id, intervals, request_json, request.user) return create_json_response({ 'locusListsByGuid': {locus_list.guid: get_json_for_locus_list(locus_list, request.user)}, 'genesById': genes_by_id or {}, })
def create_locus_list_handler(request): request_json = json.loads(request.body) if not request_json.get('name'): return create_json_response({}, status=400, reason='"Name" is required') genes_by_id, intervals, invalid_items = parse_locus_list_items(request_json) if invalid_items and not request_json.get('ignoreInvalidItems'): return create_json_response({'invalidLocusListItems': invalid_items}, status=400, reason=INVALID_ITEMS_ERROR) locus_list = create_seqr_model( LocusList, name=request_json['name'], description=request_json.get('description') or '', is_public=request_json.get('isPublic') or False, created_by=request.user, ) _update_locus_list_items(locus_list, genes_by_id, intervals, request_json, request.user) add_locus_list_user_permissions(locus_list) return create_json_response({ 'locusListsByGuid': {locus_list.guid: get_json_for_locus_list(locus_list, request.user)}, 'genesById': genes_by_id, })
def get_es_variants(search_model, es_search_cls=EsSearch, sort=XPOS_SORT_KEY, **kwargs): cache_key = 'search_results__{}__{}'.format(search_model.guid, sort or XPOS_SORT_KEY) previous_search_results = safe_redis_get_json(cache_key) or {} previously_loaded_results, search_kwargs = es_search_cls.process_previous_results( previous_search_results, **kwargs) if previously_loaded_results is not None: return previously_loaded_results, previous_search_results.get( 'total_results') search = search_model.variant_search.search genes, intervals, invalid_items = parse_locus_list_items( search.get('locus', {})) if invalid_items: raise Exception('Invalid genes/intervals: {}'.format( ', '.join(invalid_items))) rs_ids, variant_ids, invalid_items = _parse_variant_items( search.get('locus', {})) if invalid_items: raise Exception('Invalid variants: {}'.format( ', '.join(invalid_items))) es_search = es_search_cls( search_model.families.all(), previous_search_results=previous_search_results, skip_unaffected_families=search.get('inheritance'), ) if search.get('customQuery'): custom_q = search['customQuery'] if not isinstance(custom_q, list): custom_q = [custom_q] for q_dict in custom_q: es_search.filter(Q(q_dict)) if sort: es_search.sort(sort) if genes or intervals or rs_ids or variant_ids: es_search.filter_by_location(genes=genes, intervals=intervals, rs_ids=rs_ids, variant_ids=variant_ids, locus=search['locus']) if (variant_ids or rs_ids) and not ( genes or intervals) and not search['locus'].get('excludeLocations'): search_kwargs['num_results'] = len(variant_ids) + len(rs_ids) if search.get('freqs'): es_search.filter_by_frequency(search['freqs']) es_search.filter_by_annotation_and_genotype( search.get('inheritance'), quality_filter=search.get('qualityFilter'), annotations=search.get('annotations'), annotations_secondary=search.get('annotations_secondary'), pathogenicity=search.get('pathogenicity')) if hasattr(es_search, 'aggregate_by_gene'): es_search.aggregate_by_gene() variant_results = es_search.search(**search_kwargs) safe_redis_set_json(cache_key, es_search.previous_search_results) return variant_results, es_search.previous_search_results['total_results']
def get_es_variants(search_model, page=1, num_results=100): start_index = (page - 1) * num_results end_index = page * num_results if search_model.total_results is not None: end_index = min(end_index, search_model.total_results) previous_search_results = search_model.results or {} loaded_results = previous_search_results.get('all_results') or [] if len(loaded_results) >= end_index: if previous_search_results.get('compound_het_results'): results, _ = _get_compound_het_page(loaded_results, page, num_results) return results return loaded_results[start_index:end_index] elif len(loaded_results): start_index = max(start_index, len(loaded_results)) search = search_model.variant_search.search sort = search_model.sort genes, intervals, invalid_items = parse_locus_list_items( search.get('locus', {})) if invalid_items: raise Exception('Invalid genes/intervals: {}'.format( ', '.join(invalid_items))) es_search, family_samples_by_id, elasticsearch_index = _get_es_search_for_families( search_model.families.all(), elasticsearch_index=search_model.es_index) if genes or intervals: es_search = es_search.filter( _location_filter(genes, intervals, search['locus'])) # Pathogencicity and transcript consequences act as "OR" filters instead of the usual "AND" pathogenicity_annotations_filter = _pathogenicity_filter( search.get('pathogenicity', {})) allowed_consequences = None if search.get('annotations'): consequences_filter, allowed_consequences = _annotations_filter( search['annotations']) if pathogenicity_annotations_filter: pathogenicity_annotations_filter |= consequences_filter else: pathogenicity_annotations_filter = consequences_filter if pathogenicity_annotations_filter: es_search = es_search.filter(pathogenicity_annotations_filter) if search.get('freqs'): es_search = es_search.filter(_frequency_filter(search['freqs'])) genotypes_q, inheritance_mode, compound_het_q = _genotype_filter( search.get('inheritance'), family_samples_by_id, quality_filter=search.get('qualityFilter')) compound_het_search = None if compound_het_q: compound_het_search = es_search.filter(compound_het_q) es_search = es_search.filter(genotypes_q) if inheritance_mode == RECESSIVE: # recessive results are merged with compound het results so need to load all results through the end of the requested page, # not just a single page's worth of results (i.e. when skipping pages need to load middle pages as well) start_index = len(previous_search_results.get('variant_results') or []) sort = _get_sort(sort) variant_results = [] total_results = 0 if inheritance_mode != COMPOUND_HET: es_search = es_search.sort(*sort) logger.info( 'Searching in elasticsearch index: {}'.format(elasticsearch_index)) variant_results, total_results = _execute_search( es_search, family_samples_by_id, start_index=start_index, end_index=end_index) compound_het_results = previous_search_results.get('compound_het_results') total_compound_het_results = None if inheritance_mode in [COMPOUND_HET, RECESSIVE ] and compound_het_results is None: # For compound het search get results from aggregation instead of top level hits compound_het_search = compound_het_search[: 0] if compound_het_search else es_search[: 0] compound_het_search.aggs.bucket('genes', 'terms', field='geneIds', min_doc_count=2, size=10000).metric( 'vars_by_gene', 'top_hits', size=100, sort=sort, _source=QUERY_FIELD_NAMES) logger.info( 'Searching in elasticsearch index: {}'.format(elasticsearch_index)) logger.debug(json.dumps(compound_het_search.to_dict(), indent=2)) response = compound_het_search.execute() compound_het_results, total_compound_het_results = _parse_compound_het_hits( response, allowed_consequences, family_samples_by_id) logger.info( 'Total compound het hits: {}'.format(total_compound_het_results)) if compound_het_results: previous_search_results['compound_het_results'] = compound_het_results variant_results += previous_search_results.get('variant_results', []) previous_search_results['variant_results'] = variant_results if total_compound_het_results is not None: total_results += total_compound_het_results else: total_results = search_model.total_results grouped_variants = [[var] for var in variant_results] grouped_variants = compound_het_results + grouped_variants # Sort merged result sets grouped_variants = sorted( grouped_variants, key=lambda variants: tuple(variants[0]['_sort'])) # Only return the requested page of variants variant_results, end_index = _get_compound_het_page( grouped_variants, page, num_results) previous_search_results['all_results'] = grouped_variants[:end_index] # Only save contiguous pages of results elif len(loaded_results) == start_index: previous_search_results[ 'all_results'] = loaded_results + variant_results search_model.results = previous_search_results search_model.total_results = total_results search_model.es_index = elasticsearch_index search_model.save() return variant_results
def get_es_variants(search_model, sort=XPOS_SORT_KEY, page=1, num_results=100, load_all=False): cache_key = 'search_results__{}__{}'.format(search_model.guid, sort) redis_client = None previous_search_results = {} try: redis_client = redis.StrictRedis(host=settings.REDIS_SERVICE_HOSTNAME, socket_connect_timeout=3) previous_search_results = json.loads(redis_client.get(cache_key) or '{}') except Exception as e: logger.warn("Unable to connect to redis host: {}".format(settings.REDIS_SERVICE_HOSTNAME) + str(e)) total_results = previous_search_results.get('total_results') if load_all: num_results = total_results or 10000 start_index = (page-1)*num_results end_index = page * num_results if previous_search_results.get('total_results') is not None: end_index = min(end_index, previous_search_results['total_results']) loaded_results = previous_search_results.get('all_results') or [] if len(loaded_results) >= end_index: return loaded_results[start_index:end_index], total_results grouped_results = previous_search_results.get('grouped_results') if grouped_results: results = _get_compound_het_page(grouped_results, start_index, end_index) if results is not None: return results, total_results search = search_model.variant_search.search genes, intervals, invalid_items = parse_locus_list_items(search.get('locus', {})) if invalid_items: raise Exception('Invalid genes/intervals: {}'.format(', '.join(invalid_items))) es_search = EsSearch(search_model.families.all(), previous_search_results=previous_search_results) es_search.sort(sort) if genes or intervals: es_search.filter(_location_filter(genes, intervals, search['locus'])) # Pathogencicity and transcript consequences act as "OR" filters instead of the usual "AND" pathogenicity_filter = _pathogenicity_filter(search.get('pathogenicity', {})) if search.get('annotations'): es_search.filter_by_annotations(search['annotations'], pathogenicity_filter) elif pathogenicity_filter: es_search.filter(pathogenicity_filter) if search.get('freqs'): es_search.filter(_frequency_filter(search['freqs'])) es_search.filter_by_genotype( search.get('inheritance'), quality_filter=search.get('qualityFilter'), ) variant_results = es_search.search(page=page, num_results=num_results) try: redis_client.set(cache_key, json.dumps(es_search.previous_search_results)) except Exception as e: logger.warn("Unable to write to redis: {}".format(settings.REDIS_SERVICE_HOSTNAME) + str(e)) search_model.save() return variant_results, es_search.previous_search_results['total_results']