def _find_snp_position(snp_track, name): if snp_track is None: query = ElasticQuery.query_match("id", name) elastic = Search(query, idx=ElasticSettings.idx('MARKER')) snpResult = elastic.get_json_response() if(len(snpResult['hits']['hits'])) > 0: snp = snpResult['hits']['hits'][0]['_source'] chrom = snp['seqid'].replace('chr', "") position = snp['start'] return {'chr': chrom, 'start': (position-1), 'end': position, 'name': name} else: mo = re.match(r"(.*)-(.*)", snp_track) (group, track) = mo.group(1, 2) try: snp_track_idx = ElasticSettings.idx('CP_STATS_'+group.upper(), snp_track.upper()) except SettingsError: snp_track_idx = ElasticSettings.idx('CP_STATS_'+group.upper())+"/"+track query = ElasticQuery.query_match("name", name) elastic = Search(query, idx=snp_track_idx) snpResult = elastic.get_json_response() if(len(snpResult['hits']['hits'])) > 0: snp = snpResult['hits']['hits'][0]['_source'] chrom = snp['seqid'].replace('chr', "") position = snp['start'] return {'chr': chrom, 'start': (position-1), 'end': position, 'name': name} return {'error': 'Marker '+name+' does not exist in the currently selected dataset'}
def filter_queryset(self, request, queryset, view): ''' Override this method to request just the documents required from elastic. ''' q_size = view.paginator.get_limit(request) q_from = view.paginator.get_offset(request) filterable = getattr(view, 'filter_fields', []) filters = dict([(k, v) for k, v in request.GET.items() if k in filterable]) search_filters = self._build_filters(filters=filters) if search_filters is not None: q = ElasticQuery.filtered(Query.match_all(), search_filters) else: q = ElasticQuery(Query.match_all()) s = Search(search_query=q, idx=getattr(view, 'idx'), size=q_size, search_from=q_from) json_results = s.get_json_response() results = [] for result in json_results['hits']['hits']: new_obj = ElasticObject(initial=result['_source']) new_obj.uuid = result['_id'] results.append(new_obj) view.es_count = json_results['hits']['total'] return results
def filter_queryset(self, request, queryset, view): ''' Override this method to request just the documents required from elastic. ''' q_size = view.paginator.get_limit(request) q_from = view.paginator.get_offset(request) filterable = getattr(view, 'filter_fields', []) print(filterable) print(request) filters = dict([(k, v) for k, v in request.GET.items() if k in filterable]) criteria_idx = self._get_index(filters.get('feature_type', 'GENE_CRITERIA')) idx = criteria_idx if type(criteria_idx) == list: idx = ','.join(ElasticSettings.idx(name) for name in criteria_idx) else: idx = ElasticSettings.idx(criteria_idx) q = ElasticQuery(Query.match_all()) s = Search(search_query=q, idx=idx, size=q_size, search_from=q_from) json_results = s.get_json_response() results = [] for result in json_results['hits']['hits']: new_obj = ElasticObject(initial=result['_source']) new_obj.uuid = result['_id'] new_obj.criteria_type = result['_type'] results.append(new_obj) view.es_count = json_results['hits']['total'] return results
def get_object(self): q = ElasticQuery(Query.ids(self.kwargs[self.lookup_field])) s = Search(search_query=q, idx=getattr(self, 'idx')) try: result = s.get_json_response()['hits']['hits'][0] obj = ElasticObject(initial=result['_source']) obj.uuid = result['_id'] # May raise a permission denied self.check_object_permissions(self.request, obj) return obj except (TypeError, ValueError, IndexError): raise Http404
def get_object(self): q = ElasticQuery(Query.ids(self.kwargs[self.lookup_field])) s = Search(search_query=q, idx=getattr(self, 'idx')) try: result = s.get_json_response()['hits']['hits'][0] obj = ElasticObject(initial=result['_source']) obj.uuid = result['_id'] obj.criteria_type = result['_type'] # May raise a permission denied self.check_object_permissions(self.request, obj) return obj except (TypeError, ValueError, IndexError): raise Http404
def post(self, request, *args, **kwargs): ens_id = self.request.POST.get('ens_id') marker = self.request.POST.get('marker') markers = self.request.POST.getlist('markers[]') if ens_id: sfilter = Filter(Query.query_string(ens_id, fields=["genes"]).query_wrap()) elif marker: sfilter = Filter(Query.query_string(marker, fields=["marker"]).query_wrap()) elif markers: sfilter = Filter(Query.query_string(' '.join(markers), fields=["marker"]).query_wrap()) query = ElasticQuery.filtered(Query.match_all(), sfilter) elastic = Search(query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'), size=500) study_hits = elastic.get_json_response()['hits'] ens_ids = [] pmids = [] for hit in study_hits['hits']: if 'pmid' in hit['_source']: pmids.append(hit['_source']['pmid']) if 'genes' in hit['_source']: for ens_id in hit['_source']['genes']: ens_ids.append(ens_id) docs = utils.get_gene_docs_by_ensembl_id(ens_ids, ['symbol']) pub_docs = PublicationDocument.get_pub_docs_by_pmid(pmids, sources=['authors.name', 'journal']) for hit in study_hits['hits']: genes = {} if 'genes' in hit['_source']: for ens_id in hit['_source']['genes']: try: genes[ens_id] = getattr(docs[ens_id], 'symbol') except KeyError: genes = {ens_id: ens_id} hit['_source']['genes'] = genes if 'pmid' in hit['_source']: pmid = hit['_source']['pmid'] try: authors = getattr(pub_docs[pmid], 'authors') journal = getattr(pub_docs[pmid], 'journal') hit['_source']['pmid'] = \ {'pmid': pmid, 'author': authors[0]['name'].rsplit(None, 1)[-1] if authors else "", 'journal': journal} except KeyError: hit['_source']['pmid'] = {'pmid': pmid} return JsonResponse(study_hits)
def _build_frags_query(frags_idx, chrom, segmin, segmax): query = ElasticQuery.filtered(Query.terms("seqid", [chrom, str("chr"+chrom)]), Filter(RangeQuery("end", gte=segmin, lte=segmax)), utils.bedFields) fragsQuery = Search(search_query=query, search_from=0, size=10000, idx=frags_idx) # fragsResult = fragsQuery.get_result() # frags = fragsResult['data'] fragsResult = fragsQuery.get_json_response() frags = [] for hit in fragsResult['hits']['hits']: frags.append(hit['_source']) frags = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], frags) return frags
def get_criteria_details(cls, feature_id, idx, idx_type, criteria_id=None): '''Function to get criteria details for a given feature_id. If criteria_id is given, the result is restricted to that criteria @type feature_id: string @keyword feature_id: Id of the feature (gene => gene_id, region=>region_id) @type criteria_id: string @keyword criteria_id: criteria_id eg: cand_gene_in_study, gene_in_region @type idx: string @param idx: name of the index @type idx_type: string @param idx_type: name of the idx type, each criteria is an index type ''' query = ElasticQuery(Query.term("qid", feature_id)) search = Search(query, idx=idx, idx_type=idx_type) # elastic_docs = search.search().docs criteria_hits = search.get_json_response()['hits'] return(criteria_hits)
def _build_hic_query(query, targetIdx, segmin=0, segmax=0): hic = [] hicElastic = Search(query, idx=ElasticSettings.idx('CP_TARGET_'+targetIdx), search_from=0, size=2000) # hicResult = hicElastic.get_result() hicResult = hicElastic.get_json_response() if "error" in hicResult: return {'error': 'No search results found. Please try again.'}, segmin, segmax if len(hicResult['hits']['hits']) > 0: for hit in hicResult['hits']['hits']: hic.append(hit['_source']) if segmin == 0 or segmax == 0: (segmin, segmax) = utils.segCoords(hic) extension = int(0.05*(segmax-segmin)) segmin = segmin - extension segmax = segmax + extension hic = utils.makeRelative(int(segmin), int(segmax), ['baitStart', 'baitEnd', 'oeStart', 'oeEnd'], hic) return hic, segmin, segmax
def _build_exon_query(chrom, segmin, segmax, genes): # get exonic structure for genes in this section geneExons = dict() query_bool = BoolQuery() query_bool.must([Query.term("seqid", chrom)]) if len(genes) > 0: for g in genes: query = ElasticQuery.filtered_bool(Query.query_string(g["gene_id"], fields=["name"]), query_bool, sources=utils.snpFields) elastic = Search(query, idx=getattr(chicp_settings, 'CP_GENE_IDX')+'/exons/', search_from=0, size=2000) # result = elastic.get_result() # exons = result['data'] result = elastic.get_json_response() exons = [] for hit in result['hits']['hits']: exons.append(hit['_source']) exons = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], exons) geneExons[g["gene_id"]] = sorted(exons, key=operator.itemgetter("start")) return geneExons
def interaction_details(request): """ Get interaction details for a given ensembl ID. """ ens_id = request.POST.get("ens_id") query = ElasticQuery.has_parent("gene", Query.ids(ens_id)) elastic = Search(query, idx=ElasticSettings.idx("GENE", "INTERACTIONS"), size=500) interaction_hits = elastic.get_json_response()["hits"] ens_ids = [] for hit in interaction_hits["hits"]: for interactor in hit["_source"]["interactors"]: ens_ids.append(interactor["interactor"]) docs = _get_gene_docs_by_ensembl_id(ens_ids, ["symbol"]) for hit in interaction_hits["hits"]: for interactor in hit["_source"]["interactors"]: iid = interactor["interactor"] try: interactor["symbol"] = getattr(docs[iid], "symbol") except KeyError: interactor["symbol"] = iid return JsonResponse(interaction_hits)
def interaction_details(request): ''' Get interaction details for a given ensembl ID. ''' ens_id = request.POST.get('ens_id') query = ElasticQuery.has_parent('gene', Query.ids(ens_id)) elastic = Search(query, idx=ElasticSettings.idx('GENE', 'INTERACTIONS'), size=500) interaction_hits = elastic.get_json_response()['hits'] ens_ids = [] for hit in interaction_hits['hits']: for interactor in hit['_source']['interactors']: ens_ids.append(interactor['interactor']) docs = utils.get_gene_docs_by_ensembl_id(ens_ids, ['symbol']) for hit in interaction_hits['hits']: for interactor in hit['_source']['interactors']: iid = interactor['interactor'] try: interactor['symbol'] = getattr(docs[iid], 'symbol') except KeyError: interactor['symbol'] = iid return JsonResponse(interaction_hits)
def studies_details(request): """ Get studies for a given ensembl ID. """ ens_id = request.POST.get("ens_id") sfilter = Filter(Query.query_string(ens_id, fields=["genes"]).query_wrap()) query = ElasticQuery.filtered(Query.match_all(), sfilter) elastic = Search(query, idx=ElasticSettings.idx("REGION", "STUDY_HITS"), size=500) study_hits = elastic.get_json_response()["hits"] ens_ids = [] pmids = [] for hit in study_hits["hits"]: if "pmid" in hit["_source"]: pmids.append(hit["_source"]["pmid"]) for ens_id in hit["_source"]["genes"]: ens_ids.append(ens_id) docs = _get_gene_docs_by_ensembl_id(ens_ids, ["symbol"]) pub_docs = _get_pub_docs_by_pmid(pmids, sources=["authors.name", "journal"]) for hit in study_hits["hits"]: genes = {} for ens_id in hit["_source"]["genes"]: try: genes[ens_id] = getattr(docs[ens_id], "symbol") except KeyError: genes = {ens_id: ens_id} hit["_source"]["genes"] = genes if "pmid" in hit["_source"]: pmid = hit["_source"]["pmid"] try: authors = getattr(pub_docs[pmid], "authors") journal = getattr(pub_docs[pmid], "journal") hit["_source"]["pmid"] = { "pmid": pmid, "author": authors[0]["name"].rsplit(None, 1)[-1], "journal": journal, } except KeyError: hit["_source"]["pmid"] = {"pmid": pmid} return JsonResponse(study_hits)
def genesets_details(request): ''' Get pathway gene sets for a given ensembl ID. ''' ens_id = request.POST.get('ens_id') geneset_filter = Filter(Query.query_string(ens_id, fields=["gene_sets"]).query_wrap()) query = ElasticQuery.filtered(Query.match_all(), geneset_filter) elastic = Search(query, idx=ElasticSettings.idx('GENE', 'PATHWAY'), size=500) genesets_hits = elastic.get_json_response()['hits'] ens_ids = [] for hit in genesets_hits['hits']: for ens_id in hit['_source']['gene_sets']: ens_ids.append(ens_id) docs = utils.get_gene_docs_by_ensembl_id(ens_ids, ['symbol']) for hit in genesets_hits['hits']: genesets = {} for ens_id in hit['_source']['gene_sets']: try: genesets[ens_id] = getattr(docs[ens_id], 'symbol') except KeyError: genesets[ens_id] = ens_id hit['_source']['gene_sets'] = genesets return JsonResponse(genesets_hits)
def genesets_details(request): """ Get pathway gene sets for a given ensembl ID. """ ens_id = request.POST.get("ens_id") geneset_filter = Filter(Query.query_string(ens_id, fields=["gene_sets"]).query_wrap()) query = ElasticQuery.filtered(Query.match_all(), geneset_filter) elastic = Search(query, idx=ElasticSettings.idx("GENE", "PATHWAY"), size=500) genesets_hits = elastic.get_json_response()["hits"] ens_ids = [] for hit in genesets_hits["hits"]: for ens_id in hit["_source"]["gene_sets"]: ens_ids.append(ens_id) docs = _get_gene_docs_by_ensembl_id(ens_ids, ["symbol"]) for hit in genesets_hits["hits"]: genesets = {} for ens_id in hit["_source"]["gene_sets"]: try: genesets[ens_id] = getattr(docs[ens_id], "symbol") except KeyError: genesets[ens_id] = ens_id hit["_source"]["gene_sets"] = genesets return JsonResponse(genesets_hits)
def _build_snp_query(snp_track, chrom, segmin, segmax): snps = [] snpMeta = {} maxScore = -1 if snp_track and snp_track != 'None': # get SNPs based on this segment mo = re.match(r"(.*)-(.*)", snp_track) (group, track) = mo.group(1, 2) try: snp_track_idx = ElasticSettings.idx('CP_STATS_'+group.upper(), snp_track.upper()) except SettingsError: snp_track_idx = ElasticSettings.idx('CP_STATS_'+group.upper())+"/"+track query = ElasticQuery.filtered(Query.terms("seqid", [chrom, str("chr"+chrom)]), Filter(RangeQuery("end", gte=segmin, lte=segmax)), utils.snpFields) snpQuery = Search(search_query=query, search_from=0, size=10000, idx=snp_track_idx) # snpResult = snpQuery.get_result() # snps = snpResult['data'] snpResult = snpQuery.get_json_response() snps = [] for hit in snpResult['hits']['hits']: snps.append(hit['_source']) snps = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], snps) data_type = ElasticSettings.get_label('CP_STATS_'+group.upper(), None, "data_type") snpSettings = getattr(chicp_settings, 'STUDY_DEFAULTS').get(data_type) for s in snps: if float(s['score']) > maxScore: maxScore = float(s['score']) snpSettings['max'] = maxScore snpMeta = snpSettings return snps, snpMeta
def get_all_criteria_disease_tags(cls, qids, idx, idx_type): if qids is None: query = ElasticQuery(Query.match_all(), sources=['disease_tags', 'qid']) # search = Search(query, idx=idx, idx_type=idx_type, size=30000) else: query = ElasticQuery(Query.terms("qid", qids), sources=['disease_tags', 'qid']) # search = Search(query, idx=idx, idx_type=idx_type) search = Search(query, idx=idx, idx_type=idx_type) criteria_hits = search.get_json_response()['hits'] hits = criteria_hits['hits'] meta_info = {} criteria_disease_tags = {} for hit in hits: if idx == hit['_index']: qid = hit['_source']['qid'] meta_desc = cls.get_meta_desc(idx, [hit['_type']]) meta_info[hit['_type']] = meta_desc[idx][hit['_type']] criteria_desc = hit['_type'] if qid not in criteria_disease_tags: criteria_disease_tags[qid] = {} criteria_disease_tags[qid][criteria_desc] = hit['_source']['disease_tags'] disease_tags_all = [] for fid, fvalue in criteria_disease_tags.items(): disease_tags_all = cls.get_all_criteria_disease_tags_aggregated(qid, fvalue) criteria_disease_tags[fid]['all'] = disease_tags_all criteria_disease_tags[fid]['meta_info'] = meta_info return(criteria_disease_tags)
def _add_diseases(): ''' Add diseases dictionary to a context ''' query = ElasticQuery(Query.match_all()) elastic_disease = Search(search_query=query, size=100, idx='disease') return elastic_disease.get_json_response()['hits']['hits']
def pub_details(request): ''' Get PMID details. ''' pmids = request.POST.getlist("pmids[]") query = ElasticQuery(Query.ids(pmids)) elastic = Search(query, idx=ElasticSettings.idx('PUBLICATION', 'PUBLICATION'), size=len(pmids)) return JsonResponse(elastic.get_json_response()['hits'])
def filter_queryset(self, request, queryset, view): ''' Override this method to request just the documents required from elastic. ''' q_size = view.paginator.get_limit(request) q_from = view.paginator.get_offset(request) filterable = getattr(view, 'filter_fields', []) filters = dict([(k, v) for k, v in request.GET.items() if k in filterable]) criteria_idx = self._get_index(filters.get('feature_type', 'GENE_CRITERIA')) feature_type = filters.get('feature_type') feature_id = filters.get('feature_id') aggregate = filters.get('aggregate') detail = filters.get('detail') idx = criteria_idx if type(criteria_idx) == list: idx = ','.join(ElasticSettings.idx(name) for name in criteria_idx) else: idx = ElasticSettings.idx(criteria_idx) results = [] if feature_id and aggregate == 'true': disease_doc_tags = Criteria.get_disease_tags(feature_id, idx=idx) disease_tags = [getattr(d, 'code') for d in disease_doc_tags] new_obj = ElasticObject() new_obj.qid = feature_id new_obj.disease_tags = disease_tags new_obj.criteria_type = None results.append(new_obj) return results elif feature_id and detail == 'true': (idx, idx_types) = Criteria.get_feature_idx_n_idxtypes(feature_type.lower()) criteria_details = Criteria.get_criteria_details(feature_id, idx=idx, idx_type=idx_types) criteria_list = idx_types.split(',') criteria_details_expanded = Criteria.add_meta_info(idx, criteria_list, criteria_details) feature_details = self._get_feature_details(criteria_details_expanded) for criteria, details in feature_details.items(): print(criteria) new_obj = ElasticObject() new_obj.qid = details['qid'] new_obj.criteria_type = criteria disease_tags = details['disease_tags'] fdetails = list(details['fdetails']) print('+++++++++++++++') print(disease_tags) print(fdetails) print('+++++++++++++++') new_obj.disease_tags = disease_tags new_obj.feature_details = fdetails results.append(new_obj) return results else: q = ElasticQuery(Query.match_all()) s = Search(search_query=q, idx=idx, size=q_size, search_from=q_from) json_results = s.get_json_response() for result in json_results['hits']['hits']: new_obj = ElasticObject(initial=result['_source']) new_obj.uuid = result['_id'] new_obj.criteria_type = result['_type'] results.append(new_obj) view.es_count = json_results['hits']['total'] return results
def pub_details(request): """ Get PMID details. """ pmids = request.POST.getlist("pmids[]") query = ElasticQuery(Query.ids(pmids)) elastic = Search(query, idx=ElasticSettings.idx("PUBLICATION", "PUBLICATION"), size=len(pmids)) return JsonResponse(elastic.get_json_response()["hits"])