def test_region_attributes(self): ''' test region attributes ''' idx = ElasticSettings.idx(RegionDataTest.IDX_KEY, 'REGION') (idx, idx_type) = idx.split('/') docs = ElasticUtils.get_rdm_docs(idx, idx_type, qbool=Query.match_all(), sources=[], size=1) newRegion = utils.Region.pad_region_doc(docs[0]) if len(getattr(newRegion, "genes")) > 0: query = ElasticQuery(Query.ids(getattr(newRegion, "genes"))) resultObject = Search(query, idx=ElasticSettings.idx('GENE', 'GENE'), size=len(getattr(newRegion, "genes"))).search() self.assertEqual(len(getattr(newRegion, "genes")), resultObject.hits_total, "All genes on region found in GENE index") if len(getattr(newRegion, "studies")) > 0: query = ElasticQuery(Query.ids(getattr(newRegion, "studies"))) resultObject = Search(query, idx=ElasticSettings.idx('STUDY', 'STUDY'), size=len(getattr(newRegion, "studies"))).search() self.assertEqual(len(getattr(newRegion, "studies")), resultObject.hits_total, "All study ids for region found in STUDY index") if len(getattr(newRegion, "pmids")) > 0: query = ElasticQuery(Query.ids(getattr(newRegion, "pmids"))) resultObject = Search(query, idx=ElasticSettings.idx('PUBLICATION', 'PUBLICATION'), size=len(getattr(newRegion, "pmids"))).search() self.assertEqual(len(getattr(newRegion, "pmids")), resultObject.hits_total, "All PMIDs for region found in PUBLICATION index")
def test_query_ids(self): ''' Test by query ids. ''' query = ElasticQuery(Query.ids(['1', '2'])) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'), size=5) docs = elastic.search().docs self.assertTrue(len(docs) == 2, "Elastic string query retrieved marker (rs*)") idx_type = docs[0].type() query = ElasticQuery(Query.ids('2', types=idx_type)) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'), size=5) docs = elastic.search().docs self.assertTrue(len(docs) == 1, "Elastic string query retrieved marker (rs*)")
def gene2ensembl_parse(cls, gene2ens, idx, idx_type): ''' Parse gene2ensembl file from NCBI and add entrez to gene index. ''' genes = {} for gene in gene2ens: if gene.startswith('9606\t'): parts = gene.split('\t') gene_id = parts[1] ens_id = parts[2] # prot_acc = parts[5] if ens_id not in genes: genes[ens_id] = {'dbxrefs': {'entrez': gene_id}} def process_hits(resp_json): hits = resp_json['hits']['hits'] docs = [Document(hit) for hit in hits] chunk_size = 450 for i in range(0, len(docs), chunk_size): docs_chunk = docs[i:i+chunk_size] json_data = '' for doc in docs_chunk: ens_id = doc._meta['_id'] idx_type = doc.type() doc_data = {"update": {"_id": ens_id, "_type": idx_type, "_index": idx, "_retry_on_conflict": 3}} json_data += json.dumps(doc_data) + '\n' json_data += json.dumps({'doc': genes[ens_id]}) + '\n' if json_data != '': Loader().bulk_load(idx, idx_type, json_data) query = ElasticQuery(Query.ids(list(genes.keys()))) ScanAndScroll.scan_and_scroll(idx, idx_type=idx_type, call_fun=process_hits, query=query)
def _get_pub_docs_by_pmid(pmids, sources=None): """ Get the gene symbols for the corresponding array of ensembl IDs. A dictionary is returned with the key being the ensembl ID and the value the gene document. """ query = ElasticQuery(Query.ids(pmids), sources=sources) elastic = Search(query, idx=ElasticSettings.idx("PUBLICATION"), size=len(pmids)) return {doc.doc_id(): doc for doc in elastic.search().docs}
def get_gene_docs_by_ensembl_id(cls, ens_ids, sources=None): ''' Get the gene symbols for the corresponding array of ensembl IDs. A dictionary is returned with the key being the ensembl ID and the value the gene document. ''' query = ElasticQuery(Query.ids(ens_ids), sources=sources) elastic = Search(query, idx=ElasticSettings.idx('GENE', idx_type='GENE'), size=len(ens_ids)) return {doc.doc_id(): doc for doc in elastic.search().docs}
def gene2ensembl_parse(cls, gene2ens, idx, idx_type): ''' Parse gene2ensembl file from NCBI and add entrez to gene index. ''' genes = {} for gene in gene2ens: if gene.startswith('9606\t'): parts = gene.split('\t') gene_id = parts[1] ens_id = parts[2] # prot_acc = parts[5] if ens_id not in genes: genes[ens_id] = {'dbxrefs': {'entrez': gene_id}} query = ElasticQuery(Query.ids(list(genes.keys()))) docs = Search(query, idx=idx, idx_type=idx_type, size=80000).search().docs chunk_size = 450 for i in range(0, len(docs), chunk_size): docs_chunk = docs[i:i+chunk_size] json_data = '' for doc in docs_chunk: ens_id = doc._meta['_id'] idx_type = doc.type() doc_data = {"update": {"_id": ens_id, "_type": idx_type, "_index": idx, "_retry_on_conflict": 3}} json_data += json.dumps(doc_data) + '\n' json_data += json.dumps({'doc': genes[ens_id]}) + '\n' if json_data != '': Loader().bulk_load(idx, idx_type, json_data)
def get_publications(cls, pmids, sources=[]): ''' Get publications from the list of PMIDs. ''' if pmids is None or not pmids: return None from elastic.search import Search, ElasticQuery pubs = Search(ElasticQuery(Query.ids(pmids), sources=sources), idx=ElasticSettings.idx('PUBLICATION', 'PUBLICATION'), size=2).search().docs return pubs
def ensmart_gene_parse(cls, ensmart_f, idx, idx_type): ''' For those gene docs missing a dbxrefs.entrez use Ensembl Mart to fill in. ''' genes = {} for ensmart in ensmart_f: parts = ensmart.split('\t') ens_id = parts[0] gene_id = parts[1] swissprot = parts[2].strip() trembl = parts[3].strip() if gene_id == '': continue if ens_id in genes: if genes[ens_id]['dbxrefs']['entrez'] != gene_id: genes[ens_id]['dbxrefs']['entrez'] = None else: if swissprot != '': cls._add_to_dbxref(genes[ens_id], 'swissprot', swissprot) if trembl != '': cls._add_to_dbxref(genes[ens_id], 'trembl', trembl) else: genes[ens_id] = {'dbxrefs': {'entrez': gene_id}} if swissprot != '': genes[ens_id]['dbxrefs'].update({'swissprot': swissprot}) if trembl != '': genes[ens_id]['dbxrefs'].update({'trembl': trembl}) ''' search for the entrez ids ''' def process_hits(resp_json): hits = resp_json['hits']['hits'] docs = [Document(hit) for hit in hits] chunk_size = 450 for i in range(0, len(docs), chunk_size): docs_chunk = docs[i:i+chunk_size] json_data = '' for doc in docs_chunk: ens_id = doc._meta['_id'] if 'dbxrefs' in doc.__dict__: dbxrefs = getattr(doc, 'dbxrefs') else: dbxrefs = {} if ('entrez' in genes[ens_id]['dbxrefs'] and 'entrez' in dbxrefs and dbxrefs['entrez'] != genes[ens_id]['dbxrefs']['entrez']): logger.warn('Multiple entrez ids for ensembl id: '+ens_id) continue idx_type = doc.type() doc_data = {"update": {"_id": ens_id, "_type": idx_type, "_index": idx, "_retry_on_conflict": 3}} json_data += json.dumps(doc_data) + '\n' json_data += json.dumps({'doc': genes[ens_id]}) + '\n' if json_data != '': Loader().bulk_load(idx, idx_type, json_data) query = ElasticQuery(Query.ids(list(genes.keys()))) ScanAndScroll.scan_and_scroll(idx, idx_type=idx_type, call_fun=process_hits, query=query)
def test_get_rdm_feature_id(self): ''' Test get random feature id. ''' idx = IDX['GFF_GENERIC']['indexName'] idx_type = IDX['GFF_GENERIC']['indexType'] doc_id = ElasticUtils.get_rdm_feature_id(idx, idx_type) self.assertTrue(isinstance(doc_id, str), 'Document id') docs = Search(ElasticQuery(Query.ids(doc_id)), idx=idx).search().docs self.assertTrue(len(docs) == 1, 'Document retrieved')
def is_region_for_disease(cls, hit, section=None, config=None, result_container={}): result_container_populated = result_container feature_doc = hit['_source'] feature_doc['_id'] = hit['_id'] disease_loci = feature_doc['disease_loci'] region_id = feature_doc['region_id'] diseases = set() for disease_locus_id in disease_loci: query = ElasticQuery(Query.ids([disease_locus_id]), sources=['hits']) elastic = Search(query, idx=ElasticSettings.idx('REGION', idx_type='DISEASE_LOCUS')) disease_locus_hits = elastic.search().docs for disease_locus_hit in disease_locus_hits: hits = getattr(disease_locus_hit, 'hits') for hit in hits: query = ElasticQuery(Query.ids([hit])) elastic = Search(query, idx=ElasticSettings.idx('REGION', idx_type='STUDY_HITS')) hit_doc = elastic.search().docs[0] disease = getattr(hit_doc, "disease") status = getattr(hit_doc, "status") if status != 'N': return result_container disease_loci = getattr(hit_doc, "disease_locus").lower() if disease_loci == 'tbc': return result_container diseases.add(disease) for disease in diseases: result_container_populated = cls.populate_container(disease, disease, fnotes=None, features=[region_id], diseases=[disease], result_container=result_container_populated) return result_container_populated
def get_disease_loci(self): ''' Returns the disease loci for requested hit docs ''' regions_idx = ElasticSettings.idx('REGION', 'DISEASE_LOCUS') disease_loci = getattr(self, "disease_loci") if len(disease_loci) == 0: logger.warning("no disease_locus attributes found/given") return resultObj = Search(search_query=ElasticQuery(Query.ids(disease_loci)), idx=regions_idx).search() return resultObj.docs
def test_chrom(self): ''' Check correct number of chromosomes. ''' ids = ['X', 'Y'] for i in range(22): ids.append(i+1) idx = ElasticSettings.idx('BAND', idx_type='CHROM') docs = Search(ElasticQuery(Query.ids(ids)), idx=idx, size=len(ids)).search().docs self.assertEqual(len(ids), len(docs), 'Check for chromosomes') for doc in docs: self.assertGreater(getattr(doc, 'length'), 1000000, 'Chromosome length')
def get_studies(cls, study_ids=None, disease_code=None, sources=[], split_name=True): studies_query = ElasticQuery(Query.match_all(), sources=sources) if disease_code is not None: studies_query = ElasticQuery(BoolQuery(must_arr=Query.term("diseases", disease_code)), sources=sources) elif study_ids: studies_query = ElasticQuery(Query.ids(study_ids), sources=sources) studies = Search(studies_query, idx=ElasticSettings.idx('STUDY', 'STUDY'), size=200).search().docs for doc in studies: if split_name and getattr(doc, 'study_name') is not None: setattr(doc, 'study_name', getattr(doc, 'study_name').split(':', 1)[0]) return Document.sorted_alphanum(studies, "study_id")
def get_object(self): q = ElasticQuery(Query.ids(self.kwargs[self.lookup_field])) s = Search(search_query=q, idx=getattr(self, 'idx')) try: result = s.get_json_response()['hits']['hits'][0] obj = ElasticObject(initial=result['_source']) obj.uuid = result['_id'] # May raise a permission denied self.check_object_permissions(self.request, obj) return obj except (TypeError, ValueError, IndexError): raise Http404
def get_gene_docs_by_ensembl_id(ens_ids, sources=None): ''' Get the gene symbols for the corresponding array of ensembl IDs. A dictionary is returned with the key being the ensembl ID and the value the gene document. ''' genes = {} def get_genes(resp_json): hits = resp_json['hits']['hits'] for hit in hits: genes[hit['_id']] = GeneDocument(hit) query = ElasticQuery(Query.ids(ens_ids), sources=sources) ScanAndScroll.scan_and_scroll(ElasticSettings.idx('GENE'), call_fun=get_genes, query=query) return genes
def get_object(self): q = ElasticQuery(Query.ids(self.kwargs[self.lookup_field])) s = Search(search_query=q, idx=getattr(self, 'idx')) try: result = s.get_json_response()['hits']['hits'][0] obj = ElasticObject(initial=result['_source']) obj.uuid = result['_id'] obj.criteria_type = result['_type'] # May raise a permission denied self.check_object_permissions(self.request, obj) return obj except (TypeError, ValueError, IndexError): raise Http404
def get_pub_docs_by_pmid(cls, pmids, sources=None): ''' Get the publication documents for a list of PMIDs. A dictionary is returned with the key being the PMID and the value the publication document. ''' pubs = {} def get_pubs(resp_json): hits = resp_json['hits']['hits'] for hit in hits: pubs[hit['_id']] = PublicationDocument(hit) from elastic.search import ElasticQuery, ScanAndScroll query = ElasticQuery(Query.ids(pmids), sources=sources) ScanAndScroll.scan_and_scroll(ElasticSettings.idx('PUBLICATION'), call_fun=get_pubs, query=query) return pubs
def study_page(request, study): ''' Renders a study page. ''' if study is None: messages.error(request, 'No study id given.') raise Http404() query = ElasticQuery(Query.ids(study.split(','))) elastic = Search(query, idx=ElasticSettings.idx('STUDY', 'STUDY'), size=5) res = elastic.search(obj_document=StudyDocument) if res.hits_total == 0: messages.error(request, 'Study(s) '+study+' not found.') elif res.hits_total < 9: names = ', '.join([getattr(doc, 'study_name') for doc in res.docs]) context = {'features': res.docs, 'title': names} return render(request, 'study/study.html', context, content_type='text/html') raise Http404()
def test_term(self): ''' Terms Aggregation ''' agg_name = "test" agg = Agg(agg_name, "terms", {"field": "seqid", "size": 0}) aggs = Aggs(agg) search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT')) r_aggs = search.search().aggs self.assertTrue(agg_name in r_aggs, "returned test aggregations") ''' Ids Query with Terms Aggregation''' query = ElasticQuery(Query.ids(['1', '2'])) search = Search(search_query=query, aggs=aggs, idx=ElasticSettings.idx('DEFAULT'), size=5) r_aggs = search.search().aggs self.assertTrue(len(r_aggs[agg_name].get_buckets()) > 0, "returned test aggregation buckets") self.assertTrue(getattr(r_aggs[agg_name], 'buckets')[0]['doc_count'] >= 0, "bucket document count")
def region_page(request, region): ''' Renders a region page. ''' if region is None: messages.error(request, 'No region given.') raise Http404() query = ElasticQuery(Query.ids(region.split(','))) elastic = Search(query, idx=ElasticSettings.idx('REGION', 'REGION'), size=5) res = elastic.search() if res.hits_total == 0: messages.error(request, 'Region(s) '+region+' not found.') elif res.hits_total < 9: names = ', '.join([getattr(doc, 'region_name') for doc in res.docs]) REGIONS = [Region.pad_region_doc(doc) for doc in res.docs] context = {'features': REGIONS, 'title': names} return render(request, 'region/index.html', context, content_type='text/html') raise Http404()
def gene_page(request): """ Renders a gene page. """ query_dict = request.GET gene = query_dict.get("g") if gene is None: messages.error(request, "No gene name given.") raise Http404() query = ElasticQuery(Query.ids(gene.split(","))) elastic = Search(query, idx=ElasticSettings.idx("GENE", "GENE"), size=5) res = elastic.search() if res.hits_total == 0: messages.error(request, "Gene(s) " + gene + " not found.") elif res.hits_total < 9: symbols = ", ".join([getattr(doc, "symbol") for doc in res.docs]) context = {"genes": res.docs, "title": symbols, "criteria": get_criteria(res.docs, "gene", "symbol", "GENE")} return render(request, "gene/gene.html", context, content_type="text/html") raise Http404()
def get_gene(cls, request, gene, context): if gene is None: messages.error(request, 'No gene name given.') raise Http404() res = Search(search_query=ElasticQuery(Query.ids(gene.split(','))), idx=ElasticSettings.idx('GENE', 'GENE'), size=9).search() if res.hits_total == 0: messages.error(request, 'Gene(s) '+gene+' not found.') elif res.hits_total < 9: context['features'] = res.docs fids = [doc.doc_id() for doc in res.docs] criteria_disease_tags = GeneView.criteria_disease_tags(request, fids) context['criteria'] = criteria_disease_tags context['title'] = ', '.join([getattr(doc, 'symbol') for doc in res.docs]) context['jbrowse_tracks'] = "PydginRegions%2CEnsemblGenes" return context raise Http404()
def test_gene_pubs(self): ''' Check the difference between the pubs indexed and those from the gene_pub file from the NCBI. If the publication pipeline has not been run recently there is likely to be a difference. This is allowed for with the NUM_DIFF variable. If there is a larger difference than this then the publication pipeline should be run. ''' ini = IniParser() config = ini.read_ini('publications.ini') section = config['GENE'] file_name = 'gene_pub_test.tmp' download_file = os.path.join(DiseasePublicationTest.TEST_DATA_DIR, file_name) success = FTPDownload().download(urljoin(section['location'], section['files']), DiseasePublicationTest.TEST_DATA_DIR, file_name=file_name) self.assertTrue(success, 'downloaded gene publications file') pmids = set() with gzip.open(download_file, 'rt') as outf: seen_add = pmids.add for x in outf: if not x.startswith('9606\t'): continue pmid = re.split('\t', x)[2].strip() if pmid not in pmids: seen_add(pmid) pmids = list(pmids) elastic = Search(search_query=ElasticQuery(BoolQuery(b_filter=Filter(Query.ids(pmids))), sources=['pmid']), idx=ElasticSettings.idx('PUBLICATION'), size=len(pmids)*2) self.assertLess(len(pmids)-elastic.get_count()['count'], GenePublicationTest.NUM_DIFF, 'Count for gene publications') # check for differences in pmids # pmids_in_idx = [] # # def get_pmids(resp_json): # hits = resp_json['hits']['hits'] # pmids_in_idx.extend([getattr(Document(h), "pmid") for h in hits]) # # ScanAndScroll.scan_and_scroll(idx=ElasticSettings.idx('PUBLICATION'), call_fun=get_pmids, # query=ElasticQuery(BoolQuery(b_filter=Filter(Query.ids(pmids))), # sources=['pmid']), # time_to_keep_scoll=30) # pmids_diff = list(set(pmids) - set(pmids_in_idx)) # self.assertLess(len(pmids_diff), GenePublicationTest.NUM_DIFF) os.remove(download_file)
def fetch_disease_locus(cls, hits_docs): region_index = ElasticSettings.idx('REGIONS', idx_type='DISEASE_LOCUS') disease_loc_docs = [] locus_id_set = set() for doc in hits_docs.docs: locus_id = getattr(doc, 'disease_locus') if locus_id not in locus_id_set: locus_id_set.add(locus_id) query = ElasticQuery(Query.ids([locus_id])) elastic = Search(query, idx=region_index) disease_loc = elastic.search().docs if(len(disease_loc) == 1): disease_loc_docs.append(disease_loc[0]) else: logger.critical('disease_locus doc not found for it ' + locus_id) return disease_loc_docs
def ensmart_homolog_parse(cls, ensmart_f, attrs, idx, idx_type): ''' Add homolog information. ''' genes = {} homologs = [a.strip().replace('_homolog_ensembl_gene', '') for a in attrs.split(',') if a.strip() != 'ensembl_gene_id'] for ensmart in ensmart_f: parts = ensmart.split('\t') ens_id = parts[0] if len(parts) > len(homologs)+1: logger.warn('IGNORE ORTHOLOGS '+ens_id+' :: '+ensmart) continue dbxrefs = {} for i in range(1, len(parts)): if parts[i].strip() != '': dbxrefs[homologs[i-1]] = {"ensembl": parts[i].strip()} if len(dbxrefs) > 0: genes[ens_id] = dbxrefs ''' search for the entrez ids ''' def process_hits(resp_json): hits = resp_json['hits']['hits'] docs = [Document(hit) for hit in hits] chunk_size = 450 for i in range(0, len(docs), chunk_size): docs_chunk = docs[i:i+chunk_size] json_data = '' for doc in docs_chunk: ens_id = doc._meta['_id'] if 'dbxrefs' in doc.__dict__: dbxrefs = getattr(doc, 'dbxrefs') else: dbxrefs = {} dbxrefs['orthologs'] = genes[ens_id] idx_type = doc.type() doc_data = {"update": {"_id": ens_id, "_type": idx_type, "_index": idx, "_retry_on_conflict": 3}} json_data += json.dumps(doc_data) + '\n' json_data += json.dumps({'doc': {'dbxrefs': dbxrefs}}) + '\n' if json_data != '': Loader().bulk_load(idx, idx_type, json_data) query = ElasticQuery(Query.ids(list(genes.keys()))) ScanAndScroll.scan_and_scroll(idx, idx_type=idx_type, call_fun=process_hits, query=query)
def get_pmids(resp_json): pmids = [] for hit in resp_json['hits']['hits']: doc = Document(hit) pmids.append(getattr(doc, "pmid")) pmids = list(set(pmids)) elastic = Search(search_query=ElasticQuery(BoolQuery(b_filter=Filter(Query.ids(pmids))), sources=['pmid']), idx=ElasticSettings.idx('PUBLICATION'), size=len(pmids)*2) if len(pmids) != elastic.get_count()['count']: # check for differences in pmids docs = elastic.search().docs pmids_in_pub_idx = [getattr(doc, 'pmid') for doc in docs] pmids_diff = list(set(pmids) - set(pmids_in_pub_idx)) self.assertListEqual([], pmids_diff, "PMIDs list empty ("+str(pmids_diff)+")") self.assertEqual(len(pmids), elastic.get_count()['count'], 'Count for region publications')
def get_region(cls, request, region, context): if region is None: messages.error(request, 'No region given.') raise Http404() query = ElasticQuery(Query.ids(region.split(','))) elastic = Search(query, idx=ElasticSettings.idx('REGION', 'REGION'), size=5) res = elastic.search() if res.hits_total == 0: messages.error(request, 'Region(s) '+region+' not found.') elif res.hits_total < 9: context['features'] = [Region.pad_region_doc(doc) for doc in res.docs] fids = [doc.doc_id() for doc in res.docs] criteria_disease_tags = RegionView.criteria_disease_tags(request, fids) context['criteria'] = criteria_disease_tags context['title'] = ', '.join([getattr(doc, 'region_name') for doc in res.docs]) return context raise Http404()
def test_pub_disease_counts(self): ''' Check all publications exist in the publication index. ''' for disease in DiseasePublicationTest.DISEASES: pmids = self._get_pmids(disease) disease_code = disease.lower() elastic = Search(search_query=ElasticQuery(BoolQuery(b_filter=Filter(Query.ids(pmids)))), idx=ElasticSettings.idx('PUBLICATION'), size=len(pmids)*2) self.assertEqual(elastic.get_count()['count'], len(pmids), 'Count for '+disease_code) # check for differences in pmids pmids_in_idx = [] def get_pmids(resp_json): pmids_in_idx.extend([getattr(Document(h), "pmid") for h in resp_json['hits']['hits']]) ScanAndScroll.scan_and_scroll(idx=ElasticSettings.idx('PUBLICATION'), call_fun=get_pmids, query=ElasticQuery(BoolQuery(b_filter=Filter(Query.ids(pmids))), sources=['pmid'])) pmids_diff = list(set(pmids) - set(pmids_in_idx)) self.assertEqual(len(pmids_diff), 0)
def interaction_details(request): ''' Get interaction details for a given ensembl ID. ''' ens_id = request.POST.get('ens_id') query = ElasticQuery.has_parent('gene', Query.ids(ens_id)) elastic = Search(query, idx=ElasticSettings.idx('GENE', 'INTERACTIONS'), size=500) interaction_hits = elastic.get_json_response()['hits'] ens_ids = [] for hit in interaction_hits['hits']: for interactor in hit['_source']['interactors']: ens_ids.append(interactor['interactor']) docs = utils.get_gene_docs_by_ensembl_id(ens_ids, ['symbol']) for hit in interaction_hits['hits']: for interactor in hit['_source']['interactors']: iid = interactor['interactor'] try: interactor['symbol'] = getattr(docs[iid], 'symbol') except KeyError: interactor['symbol'] = iid return JsonResponse(interaction_hits)
def interaction_details(request): """ Get interaction details for a given ensembl ID. """ ens_id = request.POST.get("ens_id") query = ElasticQuery.has_parent("gene", Query.ids(ens_id)) elastic = Search(query, idx=ElasticSettings.idx("GENE", "INTERACTIONS"), size=500) interaction_hits = elastic.get_json_response()["hits"] ens_ids = [] for hit in interaction_hits["hits"]: for interactor in hit["_source"]["interactors"]: ens_ids.append(interactor["interactor"]) docs = _get_gene_docs_by_ensembl_id(ens_ids, ["symbol"]) for hit in interaction_hits["hits"]: for interactor in hit["_source"]["interactors"]: iid = interactor["interactor"] try: interactor["symbol"] = getattr(docs[iid], "symbol") except KeyError: interactor["symbol"] = iid return JsonResponse(interaction_hits)
def get_comparison_results(cls, criteria_idx, criteria_idx_type, old_criteria_results, primary_id_type, criteria_sub_class): query = ElasticQuery(Query.ids(list(old_criteria_results.keys()))) elastic = Search(query, idx=criteria_idx, idx_type=criteria_idx_type, size=len(old_criteria_results)) criteria_docs = elastic.search().docs print('Number of docs from new criteria elastic index for criteria type ' + criteria_idx_type + ' ' + str(len(criteria_docs))) counter = 1 comparison_result_list = [] for criteria_doc in criteria_docs: print('==========' + str(counter) + '==========') print(criteria_doc.__dict__) counter = counter + 1 current_id = getattr(criteria_doc, 'qid') comparison_result = cls.compare_dicts(criteria_doc.__dict__, old_criteria_results[current_id], primary_id_type, criteria_sub_class, criteria_idx_type) if(len(comparison_result) > 0): comparison_result_list.append(comparison_result) return comparison_result_list
def fetch_from_elastic(cls, idx, idx_type, feature_ids): '''Lookup pydgin elastic''' query = ElasticQuery(Query.ids(feature_ids)) elastic = Search(query, idx=ElasticSettings.idx(idx, idx_type=idx_type), size=5) docs = elastic.search().docs return docs
def pub_details(request): ''' Get PMID details. ''' pmids = request.POST.getlist("pmids[]") query = ElasticQuery(Query.ids(pmids)) elastic = Search(query, idx=ElasticSettings.idx('PUBLICATION', 'PUBLICATION'), size=len(pmids)) return JsonResponse(elastic.get_json_response()['hits'])
def get_genes(cls, ens_ids, sources=[]): ''' Get gene document(s) from a list of ensembl IDs. ''' query = ElasticQuery(Query.ids(ens_ids), sources=sources) return Search(query, idx=ElasticSettings.idx('GENE', 'GENE'), size=len(ens_ids)).search().docs