def filter_queryset(self, request, queryset, view): ''' Override this method to request just the documents required from elastic. ''' q_size = view.paginator.get_limit(request) q_from = view.paginator.get_offset(request) filterable = getattr(view, 'filter_fields', []) filters = dict([(k, v) for k, v in request.GET.items() if k in filterable]) search_filters = self._build_filters(filters=filters) if search_filters is not None: q = ElasticQuery.filtered(Query.match_all(), search_filters) else: q = ElasticQuery(Query.match_all()) s = Search(search_query=q, idx=getattr(view, 'idx'), size=q_size, search_from=q_from) json_results = s.get_json_response() results = [] for result in json_results['hits']['hits']: new_obj = ElasticObject(initial=result['_source']) new_obj.uuid = result['_id'] results.append(new_obj) view.es_count = json_results['hits']['total'] return results
def get_interaction_doc(self, interaction_source='intact', parent_id=None): idx_key = 'GENE' idx_type_key = 'INTERACTIONS' parent_idx_key = 'GENE' idx = ElasticSettings.idx(idx_key, idx_type_key) (idx, idx_type) = idx.split('/') if parent_id: qbool_intact = BoolQuery().must([Query.term("interaction_source", interaction_source), Query.term("_parent", parent_id)]) else: qbool_intact = BoolQuery().should([Query.term("interaction_source", interaction_source)]) # Get random doc or specific if id is passed in query docs_by_geneid = DataIntegrityUtils.get_rdm_docs(idx, idx_type, qbool=qbool_intact, sources=[], size=1) doc = docs_by_geneid[0] # Get parent doc parent_id = doc.parent() parent_docs = DataIntegrityUtils.fetch_from_elastic(idx_key, parent_idx_key, [parent_id]) if parent_docs: self.assertTrue(len(parent_docs) >= 1, "Found 1 parent") parent_doc = parent_docs[0] return doc, parent_doc else: return self.get_interaction_doc("intact", parent_id)
def test_region_attributes(self): ''' test region attributes ''' idx = ElasticSettings.idx(RegionDataTest.IDX_KEY, 'REGION') (idx, idx_type) = idx.split('/') docs = ElasticUtils.get_rdm_docs(idx, idx_type, qbool=Query.match_all(), sources=[], size=1) newRegion = utils.Region.pad_region_doc(docs[0]) if len(getattr(newRegion, "genes")) > 0: query = ElasticQuery(Query.ids(getattr(newRegion, "genes"))) resultObject = Search(query, idx=ElasticSettings.idx('GENE', 'GENE'), size=len(getattr(newRegion, "genes"))).search() self.assertEqual(len(getattr(newRegion, "genes")), resultObject.hits_total, "All genes on region found in GENE index") if len(getattr(newRegion, "studies")) > 0: query = ElasticQuery(Query.ids(getattr(newRegion, "studies"))) resultObject = Search(query, idx=ElasticSettings.idx('STUDY', 'STUDY'), size=len(getattr(newRegion, "studies"))).search() self.assertEqual(len(getattr(newRegion, "studies")), resultObject.hits_total, "All study ids for region found in STUDY index") if len(getattr(newRegion, "pmids")) > 0: query = ElasticQuery(Query.ids(getattr(newRegion, "pmids"))) resultObject = Search(query, idx=ElasticSettings.idx('PUBLICATION', 'PUBLICATION'), size=len(getattr(newRegion, "pmids"))).search() self.assertEqual(len(getattr(newRegion, "pmids")), resultObject.hits_total, "All PMIDs for region found in PUBLICATION index")
def show_es_gene_section(gene_symbol=None, seqid=None, start_pos=None, end_pos=None): ''' Template inclusion tag to render a gene section given a chado gene feature. ''' if seqid is not None and isinstance(seqid, str) and seqid.startswith("chr"): seqid = seqid else: seqid = 'chr' + str(seqid) if gene_symbol is not None: ''' gene symbol query''' query = ElasticQuery.query_match("gene_symbol", gene_symbol) elif end_pos is None: ''' start and end are same, range query for snp''' query_bool = BoolQuery(must_arr=[ Query.match("seqid", seqid), RangeQuery("featureloc.start", lte=start_pos), RangeQuery("featureloc.end", gte=start_pos) ]) query = ElasticQuery.bool(query_bool) else: ''' start and end are same, range query for snp''' query_bool = BoolQuery(must_arr=[ Query.match("seqid", seqid), RangeQuery("featureloc.start", gte=start_pos), RangeQuery("featureloc.end", lte=end_pos) ]) query = ElasticQuery.bool(query_bool) elastic = Search(query, idx=ElasticSettings.idx(name='GENE')) return {'es_genes': elastic.search().docs}
def association_stats(request, sources=None): ''' Get association statistics for a given marker ID. ''' seqid = request.GET.get('chr').replace('chr', '') idx_type = request.GET.get('idx_type').upper() start = request.GET.get('start') end = request.GET.get('end') data = [] def get_stats(resp_json): hits = resp_json['hits']['hits'] for hit in hits: d = Document(hit) data.append({ "CHROM": getattr(d, 'seqid'), "POS": getattr(d, 'position'), "PVALUE": getattr(d, 'p_value'), "DBSNP_ID": getattr(d, 'marker') }) query = ElasticQuery(Query.query_string(seqid, fields=["seqid"]), sources=sources) if start is not None and end is not None: query = ElasticQuery(BoolQuery(must_arr=[Query.query_string(seqid, fields=["seqid"]), RangeQuery("position", gte=start, lte=end)]), sources=sources) ScanAndScroll.scan_and_scroll(ElasticSettings.idx('IC_STATS', idx_type), call_fun=get_stats, query=query) json = {"variants": data} return JsonResponse(json)
def get_hits_by_study_id(cls, study_id, sources=[]): ''' Get visible/authenticated hits. ''' hits_query = ElasticQuery(BoolQuery(must_arr=Query.term('dil_study_id', study_id), b_filter=Filter(Query.missing_terms("field", "group_name"))), sources=sources) docs = Search(hits_query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'), size=1000).search().docs ens_ids = [gene for doc in docs if getattr(doc, 'genes') for gene in getattr(doc, 'genes')] gene_docs = utils.get_gene_docs_by_ensembl_id(ens_ids, ['symbol']) for doc in docs: if getattr(doc, 'genes'): genes = {} for ens_id in getattr(doc, 'genes'): try: genes[ens_id] = getattr(gene_docs[ens_id], 'symbol') except KeyError: genes = {ens_id: ens_id} setattr(doc, 'genes', genes) build_info = getattr(doc, 'build_info') for bi in build_info: if bi['build'] == settings.DEFAULT_BUILD: setattr(doc, "loc", "chr" + bi['seqid'] + ":" + str(locale.format("%d", bi['start'], grouping=True)) + "-" + str(locale.format("%d", bi['end'], grouping=True))) setattr(doc, "encoded_loc", "chr" + bi['seqid'] + "%3A" + str(bi['start']) + ".." + str(bi['end'])) return docs
def show_es_gene_section(gene_symbol=None, seqid=None, start_pos=None, end_pos=None): ''' Template inclusion tag to render a gene section given a chado gene feature. ''' seqid = str(seqid).replace('chr', '') if gene_symbol is not None: ''' gene symbol query''' query = ElasticQuery.query_match("symbol", gene_symbol) elif end_pos is None: ''' start and end are same, range query for snp''' query_bool = BoolQuery(must_arr=[ Query.match("chromosome", seqid), RangeQuery("start", lte=start_pos), RangeQuery("stop", gte=start_pos) ]) query = ElasticQuery.bool(query_bool) else: ''' start and end are same, range query for snp''' query_bool = BoolQuery(must_arr=[ Query.match("chromosome", seqid), RangeQuery("start", gte=start_pos), RangeQuery("stop", lte=end_pos) ]) query = ElasticQuery.bool(query_bool) elastic = Search(query, idx=ElasticSettings.idx(name='GENE')) return {'es_genes': elastic.search().docs}
def fetch_overlapping_features(cls, build, seqid, start, end, idx=None, idx_type=None, disease_id=None): ''' function to create fetch overlapping features for a given stretch of region the build info is stored as nested document..so nested query is build @type build: string @param build: build info eg: 'GRCh38' @type seqid: string @param seqid: chromosome number @type start: string @param start: region start @type end: string @param end: region end @type idx: string @param idx: name of the index @type idx_type: string @param idx_type: name of the idx type, each criteria is an index type @type disease_id: string @param disease_id: disease code ''' nbuild = build start_range = start end_range = end bool_range = BoolQuery() bool_range.must(RangeQuery("build_info.start", lte=start_range)) \ .must(RangeQuery("build_info.end", gte=end_range)) or_filter = OrFilter(RangeQuery("build_info.start", gte=start_range, lte=end_range)) or_filter.extend(RangeQuery("build_info.end", gte=start_range, lte=end_range)) \ .extend(bool_range) bool_query = BoolQuery() if disease_id: qnested_buildinfo = Query.nested('build_info', bool_query) bool_query = BoolQuery() bool_query.must(Query.term("disease", disease_id.lower())).must(qnested_buildinfo) qnested = ElasticQuery(bool_query, sources=['build_info.*', 'disease_locus', 'disease', 'chr_band', 'species']) else: bool_query.must(Query.term("build_info.build", nbuild)) \ .must(Query.term("build_info.seqid", seqid)) \ .filter(or_filter) qnested = ElasticQuery(Query.nested('build_info', bool_query), sources=['build_info.*', 'disease_locus', 'disease', 'chr_band', 'species']) elastic = Search(qnested, idx=idx, idx_type=idx_type) res = elastic.search() return res.docs
def test_bool_filtered_query(self): ''' Test building and running a filtered boolean query. ''' query_bool = BoolQuery(must_not_arr=[Query.term("seqid", 2)], should_arr=[RangeQuery("start", gte=10050)]) query_bool.must([Query.term("id", "rs768019142")]) \ .should(RangeQuery("start", gte=10054)) query = ElasticQuery.filtered_bool(Query.match_all(), query_bool, sources=["id", "seqid"]) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT')) self.assertTrue(elastic.search().hits_total == 1, "Elastic filtered query retrieved marker (rs768019142)")
def test_and_filtered_query(self): ''' Test building and running a filtered query. ''' query_bool = BoolQuery(must_arr=[RangeQuery("start", gte=1)]) and_filter = AndFilter(query_bool) and_filter.extend(RangeQuery("start", gte=1)) \ .extend(Query.term("seqid", 1)) query = ElasticQuery.filtered(Query.term("seqid", 1), and_filter) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT')) self.assertTrue(elastic.search().hits_total >= 1, "Elastic filtered query retrieved marker(s)")
def test_url_rotate(self): ''' Test the url rotates from http://xxx:9200 to correct url. ''' query = ElasticQuery.filtered(Query.term("seqid", 1), Filter(Query.term("id", "rs768019142"))) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT')) self.assertTrue(elastic.search().hits_total == 1, "Elastic filtered query retrieved marker") Search.index_exists('test', 'test2') ElasticUrl.URL_INDEX = 0 # reset
def test_term_query(self): ''' Test building and running a match query. ''' query = ElasticQuery(Query.term("id", "rs2476601")) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT')) self.assertTrue(len(elastic.search().docs) == 1, "Elastic string query retrieved marker (rs2476601)") query = ElasticQuery(Query.term("seqid", "1", boost=3.0)) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT')) self.assertTrue(len(elastic.search().docs) > 1, "Elastic string query retrieved markers on chr1")
def test_bool_filtered_query2(self): ''' Test building and running a filtered boolean query. ''' query_bool = BoolQuery() query_bool.should(RangeQuery("start", lte=20000)) \ .should(Query.term("seqid", 2)) \ .must(Query.term("seqid", 1)) query_string = Query.query_string("rs768019142", fields=["id", "seqid"]) query = ElasticQuery.filtered_bool(query_string, query_bool, sources=["id", "seqid", "start"]) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT')) self.assertTrue(elastic.search().hits_total == 1, "Elastic filtered query retrieved marker (rs768019142)")
def _auth_arr(user): ''' Get authentication array for BoolQuery for retrieving public and authenticated documents. ''' auth_arr = [Query.missing_terms("field", "group_name")] # all public documents try: auth_arr.append(Query.terms("group_name", # all documents in the user group [gp.lower() for gp in get_user_groups(user)]).query_wrap()) except Http404: # not logged in pass return auth_arr
def get_studies(cls, study_ids=None, disease_code=None, sources=[], split_name=True): studies_query = ElasticQuery(Query.match_all(), sources=sources) if disease_code is not None: studies_query = ElasticQuery(BoolQuery(must_arr=Query.term("diseases", disease_code)), sources=sources) elif study_ids: studies_query = ElasticQuery(Query.ids(study_ids), sources=sources) studies = Search(studies_query, idx=ElasticSettings.idx('STUDY', 'STUDY'), size=200).search().docs for doc in studies: if split_name and getattr(doc, 'study_name') is not None: setattr(doc, 'study_name', getattr(doc, 'study_name').split(':', 1)[0]) return Document.sorted_alphanum(studies, "study_id")
def test_or_filtered_query(self): ''' Test building and running a filtered query. ''' highlight = Highlight(["id", "seqid"]) query_bool = BoolQuery(must_arr=[RangeQuery("start", lte=1), RangeQuery("end", gte=100000)]) or_filter = OrFilter(RangeQuery("start", gte=1, lte=100000)) or_filter.extend(query_bool) \ .extend(Query.query_string("rs*", fields=["id", "seqid"]).query_wrap()) query = ElasticQuery.filtered(Query.term("seqid", 1), or_filter, highlight=highlight) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT')) self.assertTrue(elastic.search().hits_total >= 1, "Elastic filtered query retrieved marker(s)")
def test_query_ids(self): ''' Test by query ids. ''' query = ElasticQuery(Query.ids(['1', '2'])) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'), size=5) docs = elastic.search().docs self.assertTrue(len(docs) == 2, "Elastic string query retrieved marker (rs*)") idx_type = docs[0].type() query = ElasticQuery(Query.ids('2', types=idx_type)) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'), size=5) docs = elastic.search().docs self.assertTrue(len(docs) == 1, "Elastic string query retrieved marker (rs*)")
def test_bool_nested_filter(self): ''' Test combined Bool filter ''' query_bool_nest = BoolQuery() query_bool_nest.must(Query.match("id", "rs768019142").query_wrap()) \ .must(Query.term("seqid", 1)) query_bool = BoolQuery() query_bool.should(query_bool_nest) \ .should(Query.term("seqid", 2)) query = ElasticQuery.filtered_bool(Query.match_all(), query_bool, sources=["id", "seqid", "start"]) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT')) self.assertTrue(elastic.search().hits_total >= 1, "Nested bool filter query")
def test_bool_filtered_query4(self): ''' Test building and running a filtered boolean query. Note: ElasticQuery used to wrap match in a query object. ''' query_bool = BoolQuery() query_bool.should(RangeQuery("start", lte=20000)) \ .should(Query.term("seqid", 2)) \ .must(Query.match("id", "rs768019142").query_wrap()) \ .must(Query.term("seqid", 1)) query = ElasticQuery.filtered_bool(Query.match_all(), query_bool, sources=["id", "seqid", "start"]) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT')) self.assertTrue(elastic.search().hits_total == 1, "Elastic filtered query retrieved marker (rs768019142)")
def test_bool_query(self): ''' Test a bool query. ''' query_bool = BoolQuery() highlight = Highlight(["id", "seqid"]) query_bool.must(Query.term("id", "rs768019142")) \ .must(RangeQuery("start", gt=1000)) \ .must_not(Query.match("seqid", "2")) \ .should(Query.match("seqid", "3")) \ .should(Query.match("seqid", "1")) query = ElasticQuery.bool(query_bool, highlight=highlight) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT')) self.assertTrue(len(elastic.search().docs) == 1, "Elastic string query retrieved marker (rs768019142)")
def get_elastic_query(cls, section=None, config=None): ''' function to build the elastic query object @type section: string @keyword section: The section in the criteria.ini file @type config: string @keyword config: The config object initialized from criteria.ini. @return: L{Query} ''' section_config = config[section] source_fields = [] if 'source_fields' in section_config: source_fields_str = section_config['source_fields'] source_fields = source_fields_str.split(',') if 'mhc' in section: seqid = '6' start_range = 25000000 end_range = 35000000 seqid_param = section_config['seqid_param'] start_param = section_config['start_param'] end_param = section_config['end_param'] if section == 'is_gene_in_mhc': # for region you should make a different query # Defined MHC region as chr6:25,000,000..35,000,000 query = ElasticUtils.range_overlap_query(seqid, start_range, end_range, source_fields, seqid_param, start_param, end_param) elif section == 'is_marker_in_mhc': query_bool = BoolQuery() query_bool.must(RangeQuery("start", lte=end_range)) \ .must(RangeQuery("start", gte=start_range)) \ .must(Query.term("seqid", seqid)) query = ElasticQuery.filtered_bool(Query.match_all(), query_bool, sources=["id", "seqid", "start"]) elif section == 'is_region_in_mhc': query = ElasticQuery(Query.term("region_name", "MHC")) elif section == 'marker_is_gwas_significant_in_ic': # build a range query gw_sig_p = 0.00000005 query = ElasticQuery(RangeQuery("p_value", lte=gw_sig_p)) else: if len(source_fields) > 0: query = ElasticQuery(Query.match_all(), sources=source_fields) else: # query = ElasticQuery(Query.match_all()) return None return query
def test_update_doc(self): ''' Update with a partial document. ''' idx = IDX['MARKER']['indexName'] docs = Search(ElasticQuery(Query.term("id", "rs2476601"), sources=['id']), idx=idx).search().docs self.assertEquals(len(docs), 1, "rs2476601 document") update_field = {"doc": {"start": 100, "end": 200}} Update.update_doc(docs[0], update_field) Search.index_refresh(IDX['MARKER']['indexName']) docs = Search(ElasticQuery(Query.term("id", "rs2476601")), idx=idx).search().docs self.assertEquals(len(docs), 1, "rs2476601 document") self.assertEquals(getattr(docs[0], 'start'), 100, "rs2476601 start") self.assertEquals(getattr(docs[0], 'end'), 200, "rs2476601 end")
def show_disease(disease, scores, text=True, selected=None, href="/disease/"): ''' Template inclusion tag to render disease bar. ''' if isinstance(disease, str): if disease == 'OD': disease = Document({"_source": {"code": "OD", "colour": "grey", "name": "Other Diseases"}}) else: query = ElasticQuery(BoolQuery(should_arr=[Query.term('code', disease.lower()), Query.term('name', disease.lower())])) disease = Search(query, idx=ElasticSettings.idx('DISEASE'), size=1).search().docs[0] score = '' if scores != '': score = scores[0] return {'disease': disease, 'score': score, 'text': text, 'selected': selected, 'href': href}
def post(self, request, *args, **kwargs): ens_id = self.request.POST.get('ens_id') marker = self.request.POST.get('marker') markers = self.request.POST.getlist('markers[]') if ens_id: sfilter = Filter(Query.query_string(ens_id, fields=["genes"]).query_wrap()) elif marker: sfilter = Filter(Query.query_string(marker, fields=["marker"]).query_wrap()) elif markers: sfilter = Filter(Query.query_string(' '.join(markers), fields=["marker"]).query_wrap()) query = ElasticQuery.filtered(Query.match_all(), sfilter) elastic = Search(query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'), size=500) study_hits = elastic.get_json_response()['hits'] ens_ids = [] pmids = [] for hit in study_hits['hits']: if 'pmid' in hit['_source']: pmids.append(hit['_source']['pmid']) if 'genes' in hit['_source']: for ens_id in hit['_source']['genes']: ens_ids.append(ens_id) docs = utils.get_gene_docs_by_ensembl_id(ens_ids, ['symbol']) pub_docs = PublicationDocument.get_pub_docs_by_pmid(pmids, sources=['authors.name', 'journal']) for hit in study_hits['hits']: genes = {} if 'genes' in hit['_source']: for ens_id in hit['_source']['genes']: try: genes[ens_id] = getattr(docs[ens_id], 'symbol') except KeyError: genes = {ens_id: ens_id} hit['_source']['genes'] = genes if 'pmid' in hit['_source']: pmid = hit['_source']['pmid'] try: authors = getattr(pub_docs[pmid], 'authors') journal = getattr(pub_docs[pmid], 'journal') hit['_source']['pmid'] = \ {'pmid': pmid, 'author': authors[0]['name'].rsplit(None, 1)[-1] if authors else "", 'journal': journal} except KeyError: hit['_source']['pmid'] = {'pmid': pmid} return JsonResponse(study_hits)
def _build_exon_query(chrom, segmin, segmax, genes): # get exonic structure for genes in this section geneExons = dict() query_bool = BoolQuery() query_bool.must([Query.term("seqid", chrom)]) if len(genes) > 0: for g in genes: query = ElasticQuery.filtered_bool(Query.query_string(g["gene_id"], fields=["name"]), query_bool, sources=utils.snpFields) elastic = Search(query, idx=getattr(chicp_settings, 'CP_GENE_IDX')+'/exons/', search_from=0, size=2000) result = elastic.get_result() exons = result['data'] exons = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], exons) geneExons[g["gene_id"]] = sorted(exons, key=operator.itemgetter("start")) return geneExons
def pad_region_doc(cls, region): '''Adds details of disease_loci & hits for a given region doc''' hits_idx = ElasticSettings.idx('REGION', 'STUDY_HITS') disease_loci = getattr(region, "disease_loci") locus_start = Agg('region_start', 'min', {'field': 'build_info.start'}) locus_end = Agg('region_end', 'max', {'field': 'build_info.end'}) match_agg = Agg('filtered_result', 'filter', Query.match("build_info.build", 38).query_wrap(), sub_agg=[locus_start, locus_end]) build_info_agg = Agg('build_info', 'nested', {"path": 'build_info'}, sub_agg=[match_agg]) query = ElasticQuery(FilteredQuery(Query.terms("disease_locus", disease_loci), Filter(BoolQuery(should_arr=[Query.missing_terms("field", "group_name")] )))) resultObj = Search(search_query=query, idx=hits_idx, aggs=Aggs(build_info_agg)).search() hit_ids = [] markers = [] genes = [] studies = [] pmids = [] for doc in resultObj.docs: hit_ids.append(doc.doc_id()) markers.append(getattr(doc, "marker")) if hasattr(doc, "genes") and getattr(doc, "genes") != None: genes.extend([g for g in getattr(doc, "genes")]) studies.append(getattr(doc, "dil_study_id")) pmids.append(getattr(doc, "pmid")) build_info = getattr(resultObj.aggs['build_info'], 'filtered_result') region_start = int(build_info['region_start']['value']) region_end = int(build_info['region_end']['value']) build_info = { 'build': 38, 'seqid': getattr(region, "seqid"), 'start': region_start, 'end': region_end } setattr(region, "build_info", build_info) setattr(region, "hits", hit_ids) setattr(region, "markers", list(set(markers))) setattr(region, "genes", list(set(genes))) setattr(region, "studies", list(set(studies))) setattr(region, "pmids", list(set(pmids))) return region
def test_pubs_disease_tags(self): ''' Check the number of disease publications against the number of tags.disease and report differences`. ''' count = True msg = '' for disease in DiseasePublicationTest.DISEASES: pmids = self._get_pmids(disease) disease_code = disease.lower() elastic = Search(search_query=ElasticQuery(BoolQuery( b_filter=Filter(Query.term('tags.disease', disease_code))), sources=['pmid']), idx=ElasticSettings.idx('PUBLICATION'), size=len(pmids)*2) res = elastic.get_count() msg += disease_code+'\tINDEX: '+str(res['count'])+'\tNCBI: '+str(len(pmids)) if res['count'] != len(pmids): count = False docs = elastic.search().docs pmids_in_idx = [getattr(doc, 'pmid') for doc in docs] pmids_diff1 = [pmid for pmid in pmids_in_idx if pmid not in pmids] pmids_diff2 = [pmid for pmid in pmids if pmid not in pmids_in_idx] if len(pmids_diff1) > 0: msg += '\textra PMIDs: '+str(pmids_diff1) if len(pmids_diff2) > 0: msg += '\tmissing PMIDs: '+str(pmids_diff2) msg += '\n' print(msg) self.assertTrue(count, 'Count for disease tags')
def filter_queryset(self, request, queryset, view): ''' Override this method to request just the documents required from Rserve. ''' try: filterable = getattr(view, 'filter_fields', []) filters = dict([(k, v) for k, v in request.GET.items() if k in filterable]) mid1 = filters.get('marker', 'rs2476601') dataset = filters.get('dataset', 'EUR').replace('-', '') query = ElasticQuery(BoolQuery(must_arr=[Query.term("id", mid1)]), sources=['seqid', 'start']) elastic = Search(search_query=query, idx=ElasticSettings.idx('MARKER', 'MARKER'), size=1) doc = elastic.search().docs[0] seqid = getattr(doc, 'seqid') rserve = getattr(settings, 'RSERVE') conn = pyRserve.connect(host=rserve.get('HOST'), port=rserve.get('PORT')) pop_str = conn.r.get_pop(dataset, seqid, mid1) pops = json.loads(str(pop_str)) populations = [] for pop in pops: pops[pop]['population'] = pop populations.append(pops[pop]) conn.close() return [ElasticObject(initial={'populations': populations, 'marker': mid1})] except (TypeError, ValueError, IndexError, ConnectionError): return [ElasticObject(initial={'populations': None, 'marker': mid1})]
def test_error(self): score_function = ScoreFunction.create_score_function('field_value_factor', field='start') self.assertRaises(QueryError, FunctionScoreQuery, 'test_not_query', [score_function]) self.assertRaises(QueryError, FunctionScoreQuery, Query.match_all(), ['test_not_function_score']) self.assertRaises(QueryError, ScoreFunction.create_score_function, 'blah') self.assertRaises(QueryError, ScoreFunction.create_score_function, 'field_value_factor', random_scoress='val') self.assertRaises(QueryError, ScoreFunction.create_score_function, 'field_value_factor', field=10)
def get_disease_tags(cls, feature_id, idx=None, idx_type=None): ''' function to get the aggregated list of disease_tags for a given feature id, aggregated from all criteria_types for a feature type @type feature_id: string @keyword feature_id: Id of the feature (gene => gene_id, region=>region_id) @type idx: string @param idx: name of the index @type idx_type: string @param idx_type: name of the idx type, each criteria is an index type ''' query = ElasticQuery(Query.term("qid", feature_id)) agg = Agg("criteria_disease_tags", "terms", {"field": "disease_tags", "size": 0}) aggs = Aggs(agg) if idx_type: search = Search(query, aggs=aggs, idx=idx, idx_type=idx_type) else: search = Search(query, aggs=aggs, idx=idx) disease_tags = [] try: r_aggs = search.search().aggs buckets = r_aggs['criteria_disease_tags'].get_buckets() disease_tags = [dis_dict['key'].lower() for dis_dict in buckets] except: return [] # get disease docs if (len(disease_tags) > 0): (core, other) = Disease.get_site_diseases(dis_list=disease_tags) diseases = list(core) diseases.extend(other) return diseases else: return None
def get_gene_docs_by_ensembl_id(cls, ens_ids, sources=None): ''' Get the gene symbols for the corresponding array of ensembl IDs. A dictionary is returned with the key being the ensembl ID and the value the gene document. ''' query = ElasticQuery(Query.ids(ens_ids), sources=sources) elastic = Search(query, idx=ElasticSettings.idx('GENE', idx_type='GENE'), size=len(ens_ids)) return {doc.doc_id(): doc for doc in elastic.search().docs}
def test_missing_terms_filtered_query(self): ''' Test filtered query with a missing terms filter. ''' terms_filter = TermsFilter.get_missing_terms_filter("field", "group_name") query = ElasticQuery.filtered(Query.match_all(), terms_filter) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT')) docs = elastic.search().docs self.assertTrue(len(docs) == 3, "Elastic string query retrieved all public docs")
def filter_queryset(self, request, queryset, view): ''' Override this method to request just the documents required from elastic. ''' q_size = view.paginator.get_limit(request) q_from = view.paginator.get_offset(request) filterable = getattr(view, 'filter_fields', []) print(filterable) print(request) filters = dict([(k, v) for k, v in request.GET.items() if k in filterable]) criteria_idx = self._get_index(filters.get('feature_type', 'GENE_CRITERIA')) idx = criteria_idx if type(criteria_idx) == list: idx = ','.join(ElasticSettings.idx(name) for name in criteria_idx) else: idx = ElasticSettings.idx(criteria_idx) q = ElasticQuery(Query.match_all()) s = Search(search_query=q, idx=idx, size=q_size, search_from=q_from) json_results = s.get_json_response() results = [] for result in json_results['hits']['hits']: new_obj = ElasticObject(initial=result['_source']) new_obj.uuid = result['_id'] new_obj.criteria_type = result['_type'] results.append(new_obj) view.es_count = json_results['hits']['total'] return results
def gene_mgi_parse(cls, gene_pubs, idx): ''' Parse Ensembl and MGI data from JAX. ''' orthogenes_mgi = {} for gene_mgi in gene_pubs: parts = gene_mgi.split('\t') if 'MGI:' not in parts[0]: raise PipelineError('MGI not found '+parts[0]) if 'ENSMUSG' not in parts[5]: raise PipelineError('ENSMUSG not found '+parts[5]) orthogenes_mgi[parts[5]] = parts[0].replace('MGI:', '') orthogene_keys = list(orthogenes_mgi.keys()) chunk_size = 450 for i in range(0, len(orthogene_keys), chunk_size): chunk_gene_keys = orthogene_keys[i:i+chunk_size] json_data = '' query = ElasticQuery.filtered(Query.match_all(), TermsFilter.get_terms_filter("dbxrefs.orthologs.mmusculus.ensembl", chunk_gene_keys)) docs = Search(query, idx=idx, size=chunk_size).search().docs for doc in docs: ens_id = doc.doc_id() idx_type = doc.type() mm = getattr(doc, 'dbxrefs')['orthologs']['mmusculus'] mm['MGI'] = orthogenes_mgi[mm['ensembl']] dbxrefs = {"dbxrefs": {'orthologs': {"mmusculus": mm}}} doc_data = {"update": {"_id": ens_id, "_type": idx_type, "_index": idx, "_retry_on_conflict": 3}} json_data += json.dumps(doc_data) + '\n' json_data += json.dumps({'doc': dbxrefs}) + '\n' if json_data != '': Loader().bulk_load(idx, idx_type, json_data)
def gene2ensembl_parse(cls, gene2ens, idx, idx_type): ''' Parse gene2ensembl file from NCBI and add entrez to gene index. ''' genes = {} for gene in gene2ens: if gene.startswith('9606\t'): parts = gene.split('\t') gene_id = parts[1] ens_id = parts[2] # prot_acc = parts[5] if ens_id not in genes: genes[ens_id] = {'dbxrefs': {'entrez': gene_id}} query = ElasticQuery(Query.ids(list(genes.keys()))) docs = Search(query, idx=idx, idx_type=idx_type, size=80000).search().docs chunk_size = 450 for i in range(0, len(docs), chunk_size): docs_chunk = docs[i:i+chunk_size] json_data = '' for doc in docs_chunk: ens_id = doc._meta['_id'] idx_type = doc.type() doc_data = {"update": {"_id": ens_id, "_type": idx_type, "_index": idx, "_retry_on_conflict": 3}} json_data += json.dumps(doc_data) + '\n' json_data += json.dumps({'doc': genes[ens_id]}) + '\n' if json_data != '': Loader().bulk_load(idx, idx_type, json_data)
def gene2ensembl_parse(cls, gene2ens, idx, idx_type): ''' Parse gene2ensembl file from NCBI and add entrez to gene index. ''' genes = {} for gene in gene2ens: if gene.startswith('9606\t'): parts = gene.split('\t') gene_id = parts[1] ens_id = parts[2] # prot_acc = parts[5] if ens_id not in genes: genes[ens_id] = {'dbxrefs': {'entrez': gene_id}} def process_hits(resp_json): hits = resp_json['hits']['hits'] docs = [Document(hit) for hit in hits] chunk_size = 450 for i in range(0, len(docs), chunk_size): docs_chunk = docs[i:i+chunk_size] json_data = '' for doc in docs_chunk: ens_id = doc._meta['_id'] idx_type = doc.type() doc_data = {"update": {"_id": ens_id, "_type": idx_type, "_index": idx, "_retry_on_conflict": 3}} json_data += json.dumps(doc_data) + '\n' json_data += json.dumps({'doc': genes[ens_id]}) + '\n' if json_data != '': Loader().bulk_load(idx, idx_type, json_data) query = ElasticQuery(Query.ids(list(genes.keys()))) ScanAndScroll.scan_and_scroll(idx, idx_type=idx_type, call_fun=process_hits, query=query)
def get_criteria(docs, doc_type, doc_attr, idx_type_key): """ Return a dictionary of gene name:criteria. """ genes = [getattr(doc, doc_attr).lower() for doc in docs if doc.type() == doc_type] query = Query.terms("Name", genes) sources = {"exclude": ["Primary id", "Object class", "Total score"]} if ElasticSettings.idx("CRITERIA", idx_type_key) is None: return {} res = Search( ElasticQuery(query, sources=sources), idx=ElasticSettings.idx("CRITERIA", idx_type_key), size=len(genes) ).search() criteria = {} for doc in res.docs: od = collections.OrderedDict(sorted(doc.__dict__.items(), key=lambda t: t[0])) gene_name = getattr(doc, "Name") criteria[gene_name] = [ {attr.replace("_Hs", ""): value.split(":")} for attr, value in od.items() if attr != "Name" and attr != "_meta" and attr != "OD_Hs" and not value.startswith("0") ] if hasattr(doc, "OD_Hs") and not getattr(doc, "OD_Hs").startswith("0"): if gene_name not in criteria: criteria[gene_name] = [] criteria[gene_name].append({"OD": getattr(doc, "OD_Hs").split(":")}) return criteria
def _get_pub_docs_by_pmid(pmids, sources=None): """ Get the gene symbols for the corresponding array of ensembl IDs. A dictionary is returned with the key being the ensembl ID and the value the gene document. """ query = ElasticQuery(Query.ids(pmids), sources=sources) elastic = Search(query, idx=ElasticSettings.idx("PUBLICATION"), size=len(pmids)) return {doc.doc_id(): doc for doc in elastic.search().docs}
def _get_random_marker(self): ''' Get a random marker from the dbSNP elastic index. ''' (idx, idx_type) = ElasticSettings.idx('MARKER', 'MARKER').split('/') seqid = random.randint(1, 10) qbool = BoolQuery(must_arr=[Query.term("seqid", seqid), RangeQuery("tags.weight", gte=80)]) doc = ElasticUtils.get_rdm_docs(idx, idx_type, qbool=qbool, sources=['id', 'start'], size=1)[0] return getattr(doc, 'id')
def _check_gene_history(cls, gene_sets, config): '''find a way to handle this better''' section = config['GENE_HISTORY'] newgene_ids = {} discountinued_geneids = [] def process_hits(resp_json): hits = resp_json['hits']['hits'] docs = [Document(hit) for hit in hits] for doc in docs: geneid = getattr(doc, 'geneid') discontinued_geneid = getattr(doc, 'discontinued_geneid') if geneid is None: discountinued_geneids.append(str(discontinued_geneid)) else: newgene_ids[str(discontinued_geneid)] = str(geneid) query = ElasticQuery.filtered(Query.match_all(), TermsFilter.get_terms_filter("discontinued_geneid", gene_sets), sources=['geneid', 'discontinued_geneid']) ScanAndScroll.scan_and_scroll(section['index'], idx_type=section['index_type'], call_fun=process_hits, query=query) return (newgene_ids, discountinued_geneids)
def test_bulk(self): ''' Test the Bulk.load(). ''' self.set_up() idx = IDX['MARKER']['indexName'] elastic = Search(ElasticQuery(Query.match_all()), idx=idx) hits_total1 = elastic.get_count()['count'] json_data = '{"index": {"_index": "%s", "_type": "%s"}}\n' % \ (idx, 'marker') json_data += json.dumps({"alt": "G", "start": 946, "seqid": "1", "filter": ".", "ref": "A", "id": "rsXXXXX", "qual": ".", "info": "RS=XXXXX"}) resp = Bulk.load(idx, '', json_data) self.assertNotEquals(resp.status_code, 200) # note: needs a trailing line return to work Bulk.load(idx, '', json_data + '\n') Search.index_refresh(idx) hits_total2 = elastic.get_count()['count'] self.assertEquals(hits_total2, hits_total1+1, "contains documents") # produce errors updating doc id that doesn't exist json_data += '{"delete": {"_index": "%s", "_type": "%s", "_id": "%s"}}\n' % \ (idx, 'marker', 'XYZ') json_data += '{"update": {"_index": "%s", "_type": "%s", "_id": "%s"}}\n' % \ (idx, 'marker', 'XYZ') json_data += '{"doc": {"start": 100, "end": 200}}\n' resp = Bulk.load(idx, '', json_data) self.assertTrue('errors' in resp.json() and resp.json()['errors'])
def _ensembl_entrez_lookup(cls, ensembl_gene_sets, section): ''' Get an ensembl:entrez id dictionary. ''' equery = ElasticQuery.filtered(Query.match_all(), TermsFilter.get_terms_filter("dbxrefs.ensembl", ensembl_gene_sets), sources=['dbxrefs.ensembl', 'dbxrefs.entrez']) docs = Search(equery, idx=section['index'], size=len(ensembl_gene_sets)).search().docs return {doc.doc_id(): getattr(doc, 'dbxrefs')['entrez'] for doc in docs}
def test_doc(self): ''' Test return correct type of FeatureDocument. ''' idx = PydginTestSettings.IDX['GENE']['indexName'] idx_type = PydginTestSettings.IDX['GENE']['indexType'] res = Search(search_query=ElasticQuery(Query.match_all(), sources=['symbol']), idx=idx, idx_type=idx_type, size=2).search() for doc in res.docs: self.assertTrue(isinstance(doc, GeneDocument))
def get_overlapping_hits(self, build, seqid, start, end): query_bool = BoolQuery(must_arr=[RangeQuery("build_info.start", lte=start), RangeQuery("build_info.end", gte=end)]) or_filter = OrFilter(RangeQuery("build_info.start", gte=start, lte=end)) or_filter.extend(RangeQuery("build_info.end", gte=start, lte=end)) \ .extend(query_bool) range_query = FilteredQuery(BoolQuery(must_arr=[Query.term("build_info.seqid", seqid), Query.term("build_info.build", build)]), or_filter) query = ElasticQuery.filtered_bool( Query.nested("build_info", range_query), BoolQuery(must_arr=[RangeQuery("tier", lte=2)]), # sources=["disease", "marker", "chr_band", "tier", "build_info", "disease_locus"] ) elastic = Search(search_query=query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS')) return elastic.search().docs
def get_publications(cls, pmids, sources=[]): ''' Get publications from the list of PMIDs. ''' if pmids is None or not pmids: return None from elastic.search import Search, ElasticQuery pubs = Search(ElasticQuery(Query.ids(pmids), sources=sources), idx=ElasticSettings.idx('PUBLICATION', 'PUBLICATION'), size=2).search().docs return pubs
def _get_query_filters(q_dict, user): ''' Build query bool filter. If biotypes are specified add them to the filter and allow for other non-gene types. @type q_dict: dict @param q_dict: request dictionary. ''' if not q_dict.getlist("biotypes"): return None query_bool = BoolQuery() if q_dict.getlist("biotypes"): query_bool.should(Query.terms("biotype", q_dict.getlist("biotypes"))) type_filter = [Query.query_type_for_filter(ElasticSettings.search_props(c.upper(), user)['idx_type']) for c in q_dict.getlist("categories") if c != "gene"] if len(type_filter) > 0: query_bool.should(type_filter) return Filter(query_bool)
def get_rdm_feature_id(cls, idx, idx_type, qbool=Query.match_all(), sources=[], field=None): ''' Get a random feature id from the indices. ''' doc = cls.get_rdm_docs(idx, idx_type, qbool=qbool, sources=sources, size=1)[0] if field is not None: return getattr(doc, field) return doc.doc_id()
def test_sort_query(self): ''' Test sorting for a query. ''' query = ElasticQuery(Query.match_all()) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'), qsort=Sort('start:asc,_score')) self._check_sort_order(elastic.search().docs) qsort = Sort({"sort": [{"start": {"order": "asc", "mode": "avg"}}]}) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'), qsort=qsort) self._check_sort_order(elastic.search().docs) self.assertRaises(QueryError, Sort, 1)
def query_string(cls, query_term, sources=None, highlight=None, query_filter=None, **string_opts): ''' Factory method for creating elastic Query String Query. @type query_term: string @param query_term: The string to use in the query. @type sources: array of result fields @keyword sources: The _source filtering to be used (default: None). @type highlight: Highlight @keyword highlight: Define the highlighting of results (default: None). @type query_filter: Filter @keyword query_filter: Optional filter for query. @return: L{ElasticQuery} ''' if query_filter is None: query = Query.query_string(query_term, **string_opts) else: query = FilteredQuery(Query.query_string(query_term, **string_opts), query_filter) return cls(query, sources, highlight)
def test_get_rdm_feature_id(self): ''' Test get random feature id. ''' idx = IDX['GFF_GENERIC']['indexName'] idx_type = IDX['GFF_GENERIC']['indexType'] doc_id = ElasticUtils.get_rdm_feature_id(idx, idx_type) self.assertTrue(isinstance(doc_id, str), 'Document id') docs = Search(ElasticQuery(Query.ids(doc_id)), idx=idx).search().docs self.assertTrue(len(docs) == 1, 'Document retrieved')
def test_delete_docs_by_query(self): ''' Test deleting docs using a query. ''' self.set_up() idx = IDX['MARKER']['indexName'] elastic = Search(ElasticQuery(Query.match_all()), idx=idx) hits_total1 = elastic.get_count()['count'] self.assertGreater(hits_total1, 0, "contains documents") # delete single doc Delete.docs_by_query(idx, query=Query.term("id", "rs2476601")) Search.index_refresh(idx) hits_total2 = elastic.get_count()['count'] self.assertEquals(hits_total2, hits_total1-1, "contains documents") # delete remaining docs Delete.docs_by_query(idx, 'marker') Search.index_refresh(idx) self.assertEquals(elastic.get_count()['count'], 0, "contains no documents")
def test_terms_query(self): ''' Test building and running a match query. ''' highlight = Highlight(["id"]) query = ElasticQuery(Query.terms("id", ["rs2476601", "rs768019142"]), highlight=highlight) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT')) docs = elastic.search().docs self.assertTrue(len(docs) == 2, "Elastic string query retrieved markers (rs2476601, rs768019142)") self.assertTrue(getattr(docs[0], 'seqid'), "Hit attribute found") self.assertTrue(docs[0].highlight() is not None, "highlighting found")
def _build_frags_query(frags_idx, chrom, segmin, segmax): query = ElasticQuery.filtered(Query.terms("seqid", [chrom, str("chr"+chrom)]), Filter(RangeQuery("end", gte=segmin, lte=segmax)), utils.bedFields) fragsQuery = Search(search_query=query, search_from=0, size=2000000, idx=frags_idx) fragsResult = fragsQuery.get_result() frags = fragsResult['data'] frags = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], frags) return frags
def test_search_count(self): ''' Test index and search counts. ''' idx = IDX['GFF_GENERIC']['indexName'] idx_type = IDX['GFF_GENERIC']['indexType'] count1 = ElasticUtils.get_docs_count(idx, idx_type) self.assertGreater(count1, 0, 'index count') search_query = ElasticQuery( BoolQuery(must_not_arr=[Query.term('seqid', 'chr1')])) count2 = ElasticUtils.get_docs_count(idx, idx_type, search_query=search_query) self.assertGreater(count1, count2, 'search query count')
def test_function_score_query(self): ''' Test a function score query with a query (using the start position as the score). ''' score_function = ScoreFunction.create_score_function('field_value_factor', field='start', modifier='reciprocal') query_string = Query.query_string("rs*", fields=["id", "seqid"]) query = ElasticQuery(FunctionScoreQuery(query_string, [score_function], boost_mode='replace')) docs = Search(query, idx=ElasticSettings.idx('DEFAULT')).search().docs self.assertGreater(len(docs), 1, str(len(docs))) last_start = 0 for doc in docs: start = getattr(doc, 'start') self.assertLess(last_start, start) last_start = start
def docs_by_query(cls, idx, idx_type='', query=Query.match_all()): ''' Delete all documents specified by a Query. ''' def delete_docs(resp_json): hits = resp_json['hits']['hits'] json_data = '' for hit in hits: json_data += '{"delete": {"_index": "%s", "_type": "%s", "_id": "%s"}}\n' % \ (hit['_index'], hit['_type'], hit['_id']) Bulk.load(idx, idx_type, json_data) query = ElasticQuery(query, sources='_id') ScanAndScroll.scan_and_scroll(idx, idx_type=idx_type, call_fun=delete_docs, query=query)
def _get_current_build_info(self, seqid, position): ''' Get upper & lower boundaries for a hit given the position of the marker.''' query = ElasticQuery( BoolQuery(must_arr=[ RangeQuery("position", gte=position), Query.match("seqid", seqid) ])) result = Search(query, idx=ElasticSettings.idx('HAPMAP', 'HAPMAP'), qsort=Sort('position:asc'), size=1).search() genetic_map_position = getattr(result.docs[0], "genetic_map_position") query = ElasticQuery( BoolQuery(must_arr=[ RangeQuery("genetic_map_position", gte=(genetic_map_position + 0.1)), Query.match("seqid", seqid) ])) result = Search(query, idx=ElasticSettings.idx('HAPMAP', 'HAPMAP'), qsort=Sort('position:asc'), size=1).search() start = int(getattr(result.docs[0], "position")) query = ElasticQuery( BoolQuery(must_arr=[ RangeQuery("genetic_map_position", lte=(genetic_map_position - 0.1)), Query.match("seqid", seqid) ])) result = Search(query, idx=ElasticSettings.idx('HAPMAP', 'HAPMAP'), qsort=Sort('position:desc'), size=1).search() end = int(getattr(result.docs[0], "position")) build_info = {'build': 38, 'seqid': seqid, 'start': start, 'end': end} return build_info
def get_object(self): q = ElasticQuery(Query.ids(self.kwargs[self.lookup_field])) s = Search(search_query=q, idx=getattr(self, 'idx')) try: result = s.get_json_response()['hits']['hits'][0] obj = ElasticObject(initial=result['_source']) obj.uuid = result['_id'] # May raise a permission denied self.check_object_permissions(self.request, obj) return obj except (TypeError, ValueError, IndexError): raise Http404
def test_mapping_parent_child(self): ''' Test creating mapping with parent child relationship. ''' gene_mapping = MappingProperties("gene") gene_mapping.add_property("symbol", "string", analyzer="full_name") inta_mapping = MappingProperties("publication", "gene") load = Loader() idx = "test__mapping__"+SEARCH_SUFFIX options = {"indexName": idx, "shards": 1} requests.delete(ElasticSettings.url() + '/' + idx) # add child mappings first status = load.mapping(inta_mapping, "publication", analyzer=Loader.KEYWORD_ANALYZER, **options) self.assertTrue(status, "mapping inteactions") status = load.mapping(gene_mapping, "gene", analyzer=Loader.KEYWORD_ANALYZER, **options) self.assertTrue(status, "mapping genes") ''' load docs and test has parent query''' json_data = '{"index": {"_index": "%s", "_type": "gene", "_id" : "1"}}\n' % idx json_data += json.dumps({"symbol": "PAX1"}) + '\n' json_data += '{"index": {"_index": "%s", "_type": "publication", "_id" : "2", "parent": "1"}}\n' % idx json_data += json.dumps({"pubmed": 1234}) + '\n' Bulk.load(idx, '', json_data) Search.index_refresh(idx) query = ElasticQuery.has_parent('gene', Query.match('symbol', 'PAX1')) elastic = Search(query, idx=idx, idx_type='publication', size=500) docs = elastic.search().docs self.assertEquals(len(docs), 1) self.assertEquals(getattr(docs[0], 'pubmed'), 1234) self.assertEquals(docs[0].parent(), '1') self.assertRaises(QueryError, ElasticQuery.has_parent, 'gene', 'xxxxx') ''' test has child query ''' query = ElasticQuery.has_child('publication', Query.match('pubmed', 1234)) elastic = Search(query, idx=idx, idx_type='gene', size=500) docs = elastic.search().docs self.assertEquals(len(docs), 1) self.assertEquals(getattr(docs[0], 'symbol'), 'PAX1') self.assertEquals(docs[0].parent(), None) requests.delete(ElasticSettings.url() + '/' + idx)