def test_create_idx_type_model_permissions(self): elastic_settings_before = ElasticSettings.attrs().get('IDX') user_types_before = elastic_settings_before['CP_STATS_UD']['idx_type'] self.assertEqual({}, user_types_before, 'CP_STATS_UD idx_type is empty') idx = "cp:hg19_userdata_bed" new_upload_file = "tmp_newly_uploaded_file" idx_type = new_upload_file os.system( "curl -XPUT " + ElasticSettings.url() + "/" + idx + "/_mapping/" + idx_type + " -d '{\"" + idx_type + "\":{ \"properties\" : {\"message\" : {\"type\" : \"string\", \"store\" : true } } }}'" ) os.system("curl -XPUT " + ElasticSettings.url() + "/" + idx + "/" + idx_type + "/_meta -d '{\"label\": \"" + new_upload_file + "\", \"owner\": \"" + self.user.username + "\", \"uploaded\": \"" + str(timezone.now()) + "\"}'") elastic_settings_after = elastic_factory.create_idx_type_model_permissions( self.user, indexKey='CP_STATS_UD', indexTypeKey='UD-' + new_upload_file.upper(), # @IgnorePep8 new_upload_file="tmp_newly_uploaded_file") # @IgnorePep8 # elastic_settings_after = elastic_factory.get_elastic_settings_with_user_uploads(elastic_settings_before) user_types_after = elastic_settings_after['CP_STATS_UD']['idx_type'] self.assertTrue(len(user_types_after) > 0, "Has user idx_types ") self.assertTrue('UD-TMP_NEWLY_UPLOADED_FILE' in user_types_after) self.assertEqual( user_types_after['UD-TMP_NEWLY_UPLOADED_FILE']['type'], 'tmp_newly_uploaded_file')
def tearDown(self): ''' Remove loaded test indices and test repository. ''' key = 'PRIVATE_REGIONS_GFF' if key in IDX.keys(): print(ElasticSettings.url() + '/' + IDX[key]['indexName']) requests.delete(ElasticSettings.url() + '/' + IDX[key]['indexName'])
def test_create_idx_type_model_permissions(self): elastic_settings_before = ElasticSettings.attrs().get('IDX') user_types_before = elastic_settings_before['CP_STATS_UD']['idx_type'] self.assertEqual({}, user_types_before, 'CP_STATS_UD idx_type is empty') idx = "cp:hg19_userdata_bed" new_upload_file = "tmp_newly_uploaded_file" idx_type = new_upload_file os.system("curl -XPUT "+ElasticSettings.url()+"/"+idx+"/_mapping/"+idx_type+" -d '{\"" + idx_type + "\":{ \"properties\" : {\"message\" : {\"type\" : \"string\", \"store\" : true } } }}'") os.system("curl -XPUT "+ElasticSettings.url()+"/"+idx+"/"+idx_type+"/_meta -d '{\"label\": \"" + new_upload_file + "\", \"owner\": \""+self.user.username+"\", \"uploaded\": \"" + str(timezone.now())+"\"}'") elastic_settings_after = elastic_factory.create_idx_type_model_permissions(self.user, indexKey='CP_STATS_UD', indexTypeKey='UD-'+new_upload_file.upper(), # @IgnorePep8 new_upload_file="tmp_newly_uploaded_file") # @IgnorePep8 # elastic_settings_after = elastic_factory.get_elastic_settings_with_user_uploads(elastic_settings_before) user_types_after = elastic_settings_after['CP_STATS_UD']['idx_type'] self.assertTrue(len(user_types_after) > 0, "Has user idx_types ") self.assertTrue('UD-TMP_NEWLY_UPLOADED_FILE' in user_types_after) self.assertEqual(user_types_after['UD-TMP_NEWLY_UPLOADED_FILE']['type'], 'tmp_newly_uploaded_file')
def tearDownModule(): if os.path.exists(TEST_DATA_DIR + "/STAGE"): shutil.rmtree(TEST_DATA_DIR + "/STAGE") # remove index created INI_CONFIG = IniParser().read_ini(MY_INI_FILE) requests.delete(ElasticSettings.url() + "/" + INI_CONFIG["GENE_HISTORY"]["index"]) requests.delete(ElasticSettings.url() + "/" + INI_CONFIG["DBSNP"]["index"]) os.remove(MY_INI_FILE) ens_dir = os.path.join(TEST_DATA_DIR, "DOWNLOAD", "ENSMART_GENE") if os.path.exists(ens_dir): shutil.rmtree(ens_dir)
def scan_and_scroll(self, idx, call_fun=None, idx_type='', url=None, time_to_keep_scoll=1, query=None): ''' Scan and scroll an index and optionally provide a function argument to process the hits. ''' if url is None: url = ElasticSettings.url() url_search_scan = (idx + '/' + idx_type + '/_search?search_type=scan&scroll=' + str(time_to_keep_scoll) + 'm') if query is None: query = { "query": {"match_all": {}}, "size": 1000 } else: if not isinstance(query, ElasticQuery): raise QueryError("not a Query") query = query.query response = Search.elastic_request(url, url_search_scan, data=json.dumps(query)) _scroll_id = response.json()['_scroll_id'] url_scan_scroll = '_search/scroll?scroll=' + str(time_to_keep_scoll) + 'm' count = 0 while True: response = Search.elastic_request(url, url_scan_scroll, data=_scroll_id) _scroll_id = response.json()['_scroll_id'] hits = response.json()['hits']['hits'] nhits = len(hits) if nhits == 0: break count += nhits if call_fun is not None: call_fun(response.json()) logger.debug("Scanned No. Docs ( "+idx+"/"+idx_type+" ) = "+str(count))
def chicpeaFileUpload(request, url): filesDict = request.FILES files = filesDict.getlist("files[]") print(files) snpTracks = list() idx = getattr(chicp_settings, 'CHICP_IDX').get('userdata').get('INDEX') for f in files: line = f.readlines()[0].decode() if line.startswith("#"): line = f.readlines()[1].decode() parts = re.split("\t", line) if re.match("\s", line): parts = re.split("\s", line) if len(parts) != 5: logger.warn("WARNING: unexpected number of columns ("+len(parts)+"): "+line) continue f.seek(0) bedFile = NamedTemporaryFile(delete=False) bedFile.write(f.read()) bedFile.close() idx_type = os.path.basename(bedFile.name) snpTracks.append({"value": idx_type, "text": f.name}) os.system("curl -XDELETE '"+ElasticSettings.url()+"/"+idx+"/"+idx_type+"'") call_command("index_search", indexName=idx, indexType=idx_type, indexBED=bedFile.name) logger.debug("--indexName "+idx+" --indexType "+idx_type+" --indexBED "+bedFile.name) bedFile.delete context = dict() context['userSNPTracks'] = snpTracks return HttpResponse(json.dumps(context), content_type="application/json")
def tearDownModule(): # remove index created INI_CONFIG = IniParser().read_ini(MY_PUB_INI_FILE) requests.delete(ElasticSettings.url() + '/' + INI_CONFIG['DISEASE']['index']) os.remove(MY_PUB_INI_FILE) if os.path.exists(TEST_DATA_DIR + '/STAGE'): shutil.rmtree(TEST_DATA_DIR + '/STAGE')
def delete_repository(cls, repo): url = ElasticSettings.url() + '/_snapshot/' + repo resp = requests.delete(url) if resp.status_code != 200: logger.error("Status (" + url + "): " + str(resp.status_code) + " :: " + str(resp.json()["error"])) return False return True
def get_count(self): ''' Return the elastic count for a query result ''' url = self.idx + '/' + self.idx_type + '/_count?' data = {} if hasattr(self, 'query'): data = json.dumps(self.query) response = Search.elastic_request(ElasticSettings.url(), url, data=data) return response.json()
def exists(cls, repo, snapshot): ''' Test if the repository/snapshot exists. ''' url = ElasticSettings.url() + '/_snapshot/' + repo + '/' + snapshot resp = requests.get(url) if resp.status_code != 200: return False else: return True
def load(self, idx, idx_type, json_data, elastic_url=None): ''' Bulk load documents. ''' if elastic_url is None: elastic_url = ElasticSettings.url() resp = requests.put(ElasticSettings.url()+'/' + idx+'/' + idx_type + '/_bulk', data=json_data) if(resp.status_code != 200): logger.error('ERROR: '+idx+' load status: '+str(resp.status_code)+' '+str(resp.content)) # report errors found during loading if 'errors' in resp.json() and resp.json()['errors']: logger.error("ERROR: bulk load error found") for item in resp.json()['items']: for key in item.keys(): if 'error' in item[key]: logger.error("ERROR LOADING:") logger.error(item) return resp
def __init__(self, search_query=None, aggs=None, search_from=0, size=20, search_type=None, idx=ElasticSettings.idx('DEFAULT'), idx_type='', qsort=None, elastic_url=None): ''' Set up parameters to use in the search. L{ElasticQuery} is used to define a search query. @type search_query: L{ElasticQuery} @keyword search_query: The elastic query to search (default: None). @type aggs: L{Aggs} @keyword aggs: Aggregations used in the search. @type search_from: integer @keyword search_from: Offset used in paginations (default: 0). @type size: integer @keyword size: maximum number of hits to return (default: 20). @type search_type: bool @keyword search_type: Set search type = count for aggregations. @type idx: string @keyword idx: index to search (default: default index defined in settings). @type idx_type: string @keyword idx_type: index type (default: ''). @type qsort: Sort @keyword qsort: defines sorting for the query. @type url: string @keyword url: Elastic URL (default: default cluster URL). ''' if search_query is not None: if not isinstance(search_query, ElasticQuery): raise QueryError("not an ElasticQuery") self.query = search_query.query if aggs is not None: if hasattr(self, 'query'): self.query.update(aggs.aggs) else: self.query = aggs.aggs if qsort is not None: if not isinstance(qsort, Sort): raise QueryError("not a Sort") if hasattr(self, 'query'): self.query.update(qsort.qsort) else: logger.error("no query to sort") if elastic_url is None: elastic_url = ElasticSettings.url() self.size = size self.search_from = search_from self.search_type = search_type self.idx = idx self.idx_type = idx_type self.elastic_url = elastic_url if self.search_type is None: self.url = (self.idx + '/' + self.idx_type + '/_search?size=' + str(self.size) + '&from='+str(self.search_from)) else: self.url = (self.idx + '/' + self.idx_type + '/_search?search_type='+search_type)
def get_elastic_settings_with_user_uploads(cls, elastic_dict=None, new_upload_file=None): '''Get the updated elastic settings with user uploaded idx_types''' idx_key = 'CP_STATS_UD' idx = ElasticSettings.idx(idx_key) ''' Check if an index type exists in elastic and later check there is a contenttype/model for the given elastic index type. ''' # @IgnorePep8 elastic_url = ElasticSettings.url() url = idx + '/_mapping' response = Search.elastic_request(elastic_url, url, is_post=False) ''' why don't we use Search.get_mapping ? I guess it's not a class method''' #logger.debug(response.json()) if "error" in response.json(): logger.warn(response.json()) return None # get idx_types from _mapping elastic_mapping = json.loads(response.content.decode("utf-8")) # here if we use aliasing then idx can be different # this causes problems as it's effectively hardcoded # this should fix to handle things where aliases are deployed idx = list(elastic_mapping.keys())[0] idx_types = list(elastic_mapping[idx]['mappings'].keys()) if elastic_dict is None: elastic_dict = ElasticSettings.attrs().get('IDX') idx_type_dict = {} existing_ct = [ct.name for ct in ContentType.objects.filter(app_label=cls.PERMISSION_MODEL_APP_NAME)] for idx_type in idx_types: idx_type_with_suffix = idx_type + cls.PERMISSION_MODEL_TYPE_SUFFIX for ct in existing_ct: if ct.endswith(idx_type_with_suffix): meta_url = idx + '/' + idx_type + '/_meta/_source' meta_response = Search.elastic_request(elastic_url, meta_url, is_post=False) try: elastic_meta = json.loads(meta_response.content.decode("utf-8")) label = elastic_meta['label'] except: label = "UD-" + idx_type idx_type_dict['UD-' + idx_type.upper()] = {'label': label, 'type': idx_type} if new_upload_file is not None: idx_type = new_upload_file label = "UD-" + idx_type idx_type_dict['UD-' + idx_type.upper()] = {'label': label, 'type': idx_type} elastic_dict['CP_STATS_UD']['idx_type'] = idx_type_dict return elastic_dict
def index_exists(cls, idx, idx_type='', url=None): ''' Check if an index exists. ''' if url is None: elastic_url = ElasticSettings.url() url = idx + '/' + idx_type + '/_mapping' response = Search.elastic_request(elastic_url, url, is_post=False) if "error" in response.json(): logger.warning(response.json()) return False return True
def index_refresh(cls, idx, url=None): ''' Refresh to make all operations performed since the last refresh available for search''' if url is None: elastic_url = ElasticSettings.url() response = Search.elastic_request(elastic_url, idx + '/_refresh') if "error" in response.json(): logger.warning(response.content.decode("utf-8")) return False return True
def get_meta_info(cls, idx, idx_type): elastic_url = ElasticSettings.url() meta_url = idx + '/' + idx_type + '/_mapping' # print(elastic_url + meta_url) meta_response = Search.elastic_request(elastic_url, meta_url, is_post=False) try: elastic_meta = json.loads(meta_response.content.decode("utf-8")) meta_info = elastic_meta[idx]['mappings'][idx_type]['_meta'] return meta_info except: return None
def is_running(cls, repo=''): url = ElasticSettings.url() + '/_snapshot/' + repo + '/_status' resp = requests.get(url) if resp.status_code != 200: logger.debug(url + ' :: ' + resp.status_code) else: json_resp = resp.json() try: return len(json_resp['snapshots']) > 0 except Exception as e: logger.error(e) return False
def chicpeaFileUpload(request, url): filesDict = request.FILES user = request.user files = filesDict.getlist("files[]") snpTracks = list() idx = ElasticSettings.idx('CP_STATS_UD') for f in files: line = f.readlines()[0].decode() if line.startswith("#"): line = f.readlines()[1].decode() parts = re.split("\t", line) if re.match("\s", line): parts = re.split("\s", line) if len(parts) != 5: logger.warn("WARNING: unexpected number of columns ("+len(parts)+"): "+line) continue f.seek(0) bedFile = NamedTemporaryFile(delete=False) bedFile.write(f.read()) bedFile.close() idx_type = os.path.basename(bedFile.name) snpTracks.append({"value": "ud-"+idx_type, "text": f.name}) os.system("curl -XDELETE '"+ElasticSettings.url()+"/"+idx+"/"+idx_type+"'") call_command("index_search", indexName=idx, indexType=idx_type, indexBED=bedFile.name) logger.debug("index_search --indexName "+idx+" --indexType "+idx_type+" --indexBED "+bedFile.name) os.system("curl -XPUT "+ElasticSettings.url()+"/"+idx+"/"+idx_type+"/_meta -d '{\"label\": \"" + f.name + "\", \"owner\": \""+user.username+"\", \"uploaded\": \""+str(timezone.now())+"\"}'") bedFile.delete elastic_factory.create_idx_type_model_permissions(user, indexKey='CP_STATS_UD', indexTypeKey='UD-'+idx_type.upper()) context = dict() context['userSNPTracks'] = snpTracks return HttpResponse(json.dumps(context), content_type="application/json")
def update_doc(cls, doc, part_doc, elastic_url=None): ''' Update a document with a partial document. ''' if elastic_url is None: elastic_url = ElasticSettings.url() url = (doc._meta['_index'] + '/' + doc.type() + '/' + doc._meta['_id'] + '/_update') response = Search.elastic_request(elastic_url, url, data=json.dumps(part_doc)) logger.debug("curl -XPOST '" + elastic_url + url + "' -d '" + json.dumps(part_doc) + "'") if response.status_code != 200: logger.warning("Error: elastic response 200:" + url) logger.warning(response.json()) return response.json()
def test_mapping_parent_child(self): ''' Test creating mapping with parent child relationship. ''' gene_mapping = MappingProperties("gene") gene_mapping.add_property("symbol", "string", analyzer="full_name") inta_mapping = MappingProperties("publication", "gene") load = Loader() idx = "test__mapping__"+SEARCH_SUFFIX options = {"indexName": idx, "shards": 1} requests.delete(ElasticSettings.url() + '/' + idx) # add child mappings first status = load.mapping(inta_mapping, "publication", analyzer=Loader.KEYWORD_ANALYZER, **options) self.assertTrue(status, "mapping inteactions") status = load.mapping(gene_mapping, "gene", analyzer=Loader.KEYWORD_ANALYZER, **options) self.assertTrue(status, "mapping genes") ''' load docs and test has parent query''' json_data = '{"index": {"_index": "%s", "_type": "gene", "_id" : "1"}}\n' % idx json_data += json.dumps({"symbol": "PAX1"}) + '\n' json_data += '{"index": {"_index": "%s", "_type": "publication", "_id" : "2", "parent": "1"}}\n' % idx json_data += json.dumps({"pubmed": 1234}) + '\n' Bulk.load(idx, '', json_data) Search.index_refresh(idx) query = ElasticQuery.has_parent('gene', Query.match('symbol', 'PAX1')) elastic = Search(query, idx=idx, idx_type='publication', size=500) docs = elastic.search().docs self.assertEquals(len(docs), 1) self.assertEquals(getattr(docs[0], 'pubmed'), 1234) self.assertEquals(docs[0].parent(), '1') self.assertRaises(QueryError, ElasticQuery.has_parent, 'gene', 'xxxxx') ''' test has child query ''' query = ElasticQuery.has_child('publication', Query.match('pubmed', 1234)) elastic = Search(query, idx=idx, idx_type='gene', size=500) docs = elastic.search().docs self.assertEquals(len(docs), 1) self.assertEquals(getattr(docs[0], 'symbol'), 'PAX1') self.assertEquals(docs[0].parent(), None) requests.delete(ElasticSettings.url() + '/' + idx)
def show(cls, repo, snapshots, all_repos): ''' Show the information for the named snapshots. ''' if all_repos: repo = '' snapshots = '' url = ElasticSettings.url() + '/_snapshot/' + repo + '/' + snapshots resp = requests.get(url) if resp.status_code != 200: logger.error("Returned status (for " + url + "): " + str(resp.status_code)) logger.error(resp.json()["error"]) return False print(json.dumps(resp.json(), indent=4)) return True
def get_mapping(self, mapping_type=None): ''' Return the mappings for an index (host:port/{index}/_mapping/{type}). ''' self.mapping_url = (self.idx + '/_mapping') if mapping_type is not None: self.mapping_url += '/'+mapping_type elif self.idx_type is not None: self.mapping_url += '/'+self.idx_type response = Search.elastic_request(ElasticSettings.url(), self.mapping_url, is_post=False) if response.status_code != 200: json_err = json.dumps({"error": response.status_code, "response": response.content.decode("utf-8"), "url": self.mapping_url}) logger.warning(json_err) return json_err return response.json()
def get_criteria_index_types(cls, idx_key): idx = ElasticSettings.idx(idx_key) elastic_url = ElasticSettings.url() url = idx + '/_mappings' response = Search.elastic_request(elastic_url, url, is_post=False) if "error" in response.json(): logger.warn(response.json()) return None # get idx_types from _mapping elastic_mapping = json.loads(response.content.decode("utf-8")) idx_types = list(elastic_mapping[idx]['mappings'].keys()) return idx_types
def suggest(cls, term, idx, elastic_url=ElasticSettings.url(), name='data', field='suggest', context=None, size=5): ''' Auto completion suggestions for a given term. ''' if elastic_url is None: elastic_url = ElasticSettings.url() url = (idx + '/' + '/_suggest') suggest = { name: { "text": term, "completion": { "field": field, "size": size } } } if context is not None: suggest[name]['completion'].update(context) response = Search.elastic_request(elastic_url, url, data=json.dumps(suggest)) logger.debug("curl -XPOST '" + elastic_url + '/' + url + "' -d '" + json.dumps(suggest) + "'") if response.status_code != 200: logger.warning("Suggeter Error: elastic response 200:" + url) logger.warning(response.json()) return response.json()
def create_repository(self, repo, location): url = ElasticSettings.url() + '/_snapshot/' + repo if Snapshot.exists(repo, ''): logger.error("Repository " + repo + " already exists!") return False parent = os.path.abspath(os.path.join(location, "..")) if not os.path.isdir(parent): logger.warn("Check directory exists: " + parent) data = {"type": "fs", "settings": {"location": location}} resp = requests.put(url, data=json.dumps(data)) if resp.status_code != 200: logger.error("Status (" + url + "): " + str(resp.status_code) + " :: " + str(resp.json()["error"])) return True
def test_criteria_mappings(self, idx, idx_types): (main_codes, other_codes) = CriteriaManager.get_available_diseases() site_enabled_diseases = main_codes + other_codes elastic_url = ElasticSettings.url() for idx_type in idx_types: url = idx + '/' + idx_type + '/_mapping' response = Search.elastic_request(elastic_url, url, is_post=False) elastic_type_mapping = json.loads(response.content.decode("utf-8")) property_keys = list(elastic_type_mapping[idx]['mappings'][idx_type]['properties'].keys()) '''check if score and disease_tags and qid are there in mapping''' self.assertIn('score', property_keys) self.assertIn('disease_tags', property_keys) self.assertIn('qid', property_keys) '''check if all the enabled diseases are there''' for disease in site_enabled_diseases: self.assertIn(disease, property_keys)
def get_models_to_delete(self): '''Get models to delete''' idx_key = 'CP_STATS_UD' idx = ElasticSettings.idx(idx_key) ''' Check if an index type exists in elastic and later check there is a contenttype/model for the given elastic index type. ''' # @IgnorePep8 elastic_url = ElasticSettings.url() url = idx + '/_mapping' response = Search.elastic_request(elastic_url, url, is_post=False) if "error" in response.json(): logger.warn(response.json()) return None # get idx_types from _mapping elastic_mapping = json.loads(response.content.decode("utf-8")) ## fix needed if we deploy aliasing for indices idx = list(elastic_mapping.keys())[0] idx_types = list(elastic_mapping[idx]['mappings'].keys()) models2go = [] expire_days = 7 # 1 week # add idx_types that have no docs for idx_type in idx_types: ndocs = Search(idx=idx, idx_type=idx_type).get_count()['count'] if (ndocs > 0): models2go.append(idx_type) # add idx_types that were not accessed for a given time period url = idx + '/' + idx_type + '/_meta' response = Search.elastic_request(elastic_url, url, is_post=False) elastic_meta = json.loads(response.content.decode("utf-8")) if '_source' in elastic_meta: uploaded_str_date = elastic_meta['_source']['uploaded'] yymmdd_str = uploaded_str_date.split()[0] # Format: 2015-11-03 14:43:54.099645+00:00 from datetime import datetime as dt dt = dt.strptime(yymmdd_str, '%Y-%m-%d') uploaded_date = dt.date() d1 = datetime.date.today() d2 = d1 - datetime.timedelta(days=expire_days) if uploaded_date < d2: models2go.append(idx_type) return models2go
def get_models_to_delete(self): """Get models to delete""" idx_key = "CP_STATS_UD" idx = ElasticSettings.idx(idx_key) """ Check if an index type exists in elastic and later check there is a contenttype/model for the given elastic index type. """ # @IgnorePep8 elastic_url = ElasticSettings.url() url = idx + "/_mapping" response = Search.elastic_request(elastic_url, url, is_post=False) if "error" in response.json(): logger.warn(response.json()) return None # get idx_types from _mapping elastic_mapping = json.loads(response.content.decode("utf-8")) idx_types = list(elastic_mapping[idx]["mappings"].keys()) models2go = [] expire_days = 7 # 1 weeks # add idx_types that have no docs for idx_type in idx_types: ndocs = Search(idx=idx, idx_type=idx_type).get_count()["count"] if ndocs <= 1: models2go.append(idx_type) # add idx_types that were not accessed for a given time period url = idx + "/" + idx_type + "/_meta" response = Search.elastic_request(elastic_url, url, is_post=False) elastic_meta = json.loads(response.content.decode("utf-8")) if "_source" in elastic_meta: uploaded_str_date = elastic_meta["_source"]["uploaded"] yymmdd_str = uploaded_str_date.split()[0] # Format: 2015-11-03 14:43:54.099645+00:00 from datetime import datetime as dt dt = dt.strptime(yymmdd_str, "%Y-%m-%d") uploaded_date = dt.date() d1 = datetime.date.today() d2 = d1 - datetime.timedelta(days=expire_days) if uploaded_date < d2: models2go.append(idx_type) return models2go
def setupIdx(cls, idx_name_arr): ''' Setup indices in the given array of key names (e.g. ['GENE', 'DIISEASE', ...]). ''' idx_settings = { "settings": { "analysis": { "analyzer": { "full_name": {"filter": ["standard", "lowercase"], "tokenizer": "keyword"}} }, "number_of_shards": 1 } } IDX = PydginTestSettings.IDX for name in idx_name_arr: requests.put(ElasticSettings.url() + '/' + IDX[name]['indexName'], data=json.dumps(idx_settings)) call_command('index_search', **IDX[name]) for name in idx_name_arr: # wait for the elastic load to finish Search.index_refresh(IDX[name]['indexName'])
def add_disease_locus(seqid, locus_id, regionName, disease, tier, species, weight, doc_ids): data = { "region_name": disease+" "+regionName, "disease": disease, "tier": tier, "species": species, "tags": {"weight": weight}, "locus_id": locus_id, "seqid": seqid, "hits": doc_ids } # "suggest": {"input": [disease+" "+regionName, regionName], "weight": weight} resp = requests.put(ElasticSettings.url()+'/' + idx+'/disease_locus/'+locus_id, data=json.dumps(data)) if resp.status_code != 201: print(str(resp.content)) print("Problem loading "+getattr(doc, "disease")+" "+regionName) else: print("Loaded "+locus_id+" - "+regionName)
def create_snapshot(cls, repo, snapshot, indices): ''' Create a snapshot for the specified indices or all if indices is None. ''' url = ElasticSettings.url( ) + '/_snapshot/' + repo + '/' + snapshot + '?wait_for_completion=true' resp = requests.get(url) if resp.status_code == 200: logger.error("Snapshot " + snapshot + " already exists!") return False data = {} if indices is not None: data = {"indices": indices} resp = requests.put(url, data=json.dumps(data)) if resp.status_code != 200: logger.error("Snapshot " + snapshot + " create error! :: " + str(resp.json()["error"])) return True
def get_context_models_to_delete(self, *args, **options): '''Get models to delete''' ct = options['content_type'] retDict = dict() retDict['acknowledged'] = 0 logger.debug(ct) idx_key = 'CP_STATS_UD' idx = ElasticSettings.idx(idx_key) ''' Check if an index type exists in elastic and later check there is a contenttype/model for the given elastic index type. ''' # @IgnorePep8 elastic_url = ElasticSettings.url() url = idx + '/_mapping' response = Search.elastic_request(elastic_url, url, is_post=False) if "error" in response.json(): logger.warn(response.json()) retDict['errorMsg'] = response.json() self.stdout.write(json.dumps(retDict)) # get idx_types from _mapping elastic_mapping = json.loads(response.content.decode("utf-8")) ## fix needed if we deploy aliasing for indices idx = list(elastic_mapping.keys())[0] idx_types = list(elastic_mapping[idx]['mappings'].keys()) logger.debug(idx_types) # add idx_types that have no docs for idx_type in idx_types: if idx_type != ct: continue logger.debug("Found " + idx_type + " equal to " + ct) ndocs = Search(idx=idx, idx_type=idx_type).get_count()['count'] #logger.debug(Search(idx=idx, idx_type=idx_type).get_json_response()) logger.debug("WE have " + str(ndocs)) if (ndocs > 0): for cnt in ContentType.objects.filter(): if str(cnt.name).endswith(ct + '_idx_type'): logger.debug( 'Matched, finding permissions for %s %s' % (str(cnt.name), str(cnt.id))) logger.debug("deleting %s" % ct) cnt.delete() retDict['acknowledged'] = 1 #logger.debug(retDict) self.stdout.write(json.dumps(retDict))
def add_arguments(self, parser): parser.add_argument('snapshot', type=str, help='Snapshot to restore.') parser.add_argument('--url', dest='url', default=ElasticSettings.url(), metavar="ELASTIC_URL", help='Elastic URL to restore to.') parser.add_argument('--repo', dest='repo', default=ElasticSettings.getattr('REPOSITORY'), metavar=ElasticSettings.getattr('REPOSITORY'), help='Repository name') parser.add_argument( '--indices', dest='indices', default=None, metavar="idx1,idx2", help= 'Indices (comma separated) to be restored from a snapshot (default all).' )
def test_server(self): ''' Test elasticsearch server is running and status ''' try: url = ElasticSettings.url() + '/_cluster/health/' resp = requests.get(url) self.assertEqual(resp.status_code, 200, "Health page status code") if resp.json()['status'] == 'red': # allow status to recover if necessary for _ in range(3): time.sleep(1) resp = requests.get(url) if resp.json()['status'] != 'red': break self.assertFalse(resp.json()['status'] == 'red', 'Health report - red') except requests.exceptions.Timeout: self.assertTrue(False, 'timeout exception') except requests.exceptions.TooManyRedirects: self.assertTrue(False, 'too many redirects exception') except requests.exceptions.ConnectionError: self.assertTrue(False, 'request connection exception') except requests.exceptions.RequestException: self.assertTrue(False, 'request exception')
def get_elastic_settings_with_user_uploads(cls, elastic_dict=None): '''Get the updated elastic settings with user uploaded idx_types''' idx_key = 'CP_STATS_UD' idx = ElasticSettings.idx(idx_key) ''' Check if an index exists. ''' elastic_url = ElasticSettings.url() url = idx + '/_mapping' response = Search.elastic_request(elastic_url, url, is_post=False) if "error" in response.json(): logger.warn(response.json()) return None # get idx_types from _mapping elastic_mapping = json.loads(response.content.decode("utf-8")) idx_types = list(elastic_mapping[idx]['mappings'].keys()) if elastic_dict is None: elastic_dict = ElasticSettings.attrs().get('IDX') idx_type_dict = {} for idx_type in idx_types: meta_url = idx + '/' + idx_type + '/_meta/_source' meta_response = Search.elastic_request(elastic_url, meta_url, is_post=False) try: elastic_meta = json.loads(meta_response.content.decode("utf-8")) label = elastic_meta['label'] except: label = "UD-" + idx_type idx_type_dict['UD-' + idx_type.upper()] = {'label': label, 'type': idx_type} elastic_dict['CP_STATS_UD']['idx_type'] = idx_type_dict return elastic_dict
def test_create_restore_delete_snapshot(self): self.wait_for_running_snapshot() snapshot = 'test_' + ElasticSettings.getattr('TEST') repo = SnapshotTest.TEST_REPO # create a snapshot call_command('snapshot', snapshot, indices=IDX['MARKER']['indexName'], repo=repo) Snapshot.wait_for_snapshot(repo, snapshot) self.assertTrue(Snapshot.exists(repo, snapshot), "Created snapshot " + snapshot) # snapshot already exist so return false self.assertFalse( Snapshot.create_snapshot(repo, snapshot, IDX['MARKER']['indexName'])) # delete index requests.delete(ElasticSettings.url() + '/' + IDX['MARKER']['indexName']) self.assertFalse(Search.index_exists(IDX['MARKER']['indexName']), "Removed index") # restore from snapshot call_command('restore_snapshot', snapshot, repo=repo, indices=IDX['MARKER']['indexName']) Search.index_refresh(IDX['MARKER']['indexName']) self.assertTrue(Search.index_exists(IDX['MARKER']['indexName']), "Restored index exists") # remove snapshot call_command('snapshot', snapshot, delete=True, repo=repo) Snapshot.wait_for_snapshot(repo, snapshot, delete=True, count=10) self.assertFalse(Snapshot.exists(repo, snapshot), "Deleted snapshot " + snapshot)
def idx(cls, disease_f, idx, idx_type): ''' Parse and load data for cytobands. ''' for line in disease_f: line = line.strip() if line.startswith("#"): continue parts = re.split('\t', line) data = { "name": parts[0], "code": parts[2].lower(), "description": parts[1], "colour": parts[3], "tier": int(parts[4]) } data['suggest'] = {} data['suggest']["input"] = [parts[2].lower(), parts[0]] data['suggest']["weight"] = 250 resp = requests.put(ElasticSettings.url()+'/' + idx+'/'+idx_type+'/'+parts[2].lower(), data=json.dumps(data)) if resp.status_code == 201: logger.debug("Loaded "+parts[0]) else: logger.error("Problem loading "+parts[0])
def tearDownModule(): ''' Remove loaded test indices and test repository. ''' for key in IDX: requests.delete(ElasticSettings.url() + '/' + IDX[key]['indexName']) call_command('repository', SnapshotTest.TEST_REPO, delete=True)
def tearDownModule(): ''' Remove test indices ''' requests.delete(ElasticSettings.url() + '/' + IDX['MARKER']['indexName'])
def chicpeaDeleteUD(request, url): queryDict = request.POST idx_type = queryDict.get("userDataIdx") idx = getattr(chicp_settings, 'CHICP_IDX').get('userdata').get('INDEX') output = subprocess.check_output("curl -XDELETE '"+ElasticSettings.url()+"/"+idx+"/"+idx_type+"'", shell=True) return HttpResponse(output, content_type="application/json")
from elastic.elastic_settings import ElasticSettings OVERRIDE_SETTINGS_CHICP = \ {'default': { 'ELASTIC_URL': ElasticSettings.url(), 'IDX': { 'CP_STATS_UD': { 'name': 'cp:hg19_userdata_bed', 'label': 'User Data', 'idx_type': {}, }, 'CP_STATS_IC': { 'name': 'cp:hg19_immunochip_bed', 'label': 'ImmunoChip', 'idx_type': { 'IC-ATD_COOPER': {'label': "ATD - Cooper et al.", 'type': 'atd_cooper', 'auth_public': True}, 'IC-CEL_TRYNKA': {'label': "CEL - Trynka et al.", 'type': 'cel_trynka', 'auth_public': True}, 'IC-JIA_HINKS_UK': {'label': "JIA - Hinks et al. UK", 'type': 'jia_hinks_uk'}, 'IC-MS_IMSGC': {'label': "MS - IMSGC et al.", 'type': 'ms_imsgc'}, 'IC-NAR_FARACO': {'label': "NAR - Faraco et al.", 'type': 'nar_faraco'}, 'IC-PBC_LIU': {'label': "PBC - Liu et al.", 'type': 'pbc_liu', 'auth_public': True}, 'IC-RA_EYRE': {'label': "RA - Eyre et al.", 'type': 'ra_eyre', 'auth_public': True}, 'IC-T1D_ONENGUT': {'label': 'T1D - Onengut et al.', 'type': 't1d_onengut', 'auth_public': True}, }, 'auth_public': True, }, 'CP_STATS_GWAS': { 'name': 'cp:hg19_gwas_bed', 'label': 'GWAS Statistic', 'idx_type': { 'GWAS-DUBOIS': {'label': 'CEL - Dubois et al.', 'type': 'cel_dubois',
def test_settings(self): ''' Test elastic server is running. ''' resp = requests.get(ElasticSettings.url()) self.assertEqual(resp.status_code, 200)
def marker_is_gwas_significant_in_ic(cls, hit, section=None, config=None, result_container={}): """ /hg38_gwas_statistics,hg38_ic_statistics/_search?pretty' -d '{"query":{"range":{"p_value":{"lt": 0.00000005}}}}' """ gw_sig_p = 0.00000005 feature_doc = hit["_source"] feature_doc["_id"] = hit["_id"] idx = hit["_index"] idx_type = hit["_type"] # get meta data # studyid and diseaes elastic_url = ElasticSettings.url() meta_url = idx + "/" + idx_type + "/_mapping" meta_response = Search.elastic_request(elastic_url, meta_url, is_post=False) try: elastic_meta = json.loads(meta_response.content.decode("utf-8")) meta_info = elastic_meta[idx]["mappings"][idx_type]["_meta"] disease = meta_info["disease"] dil_study_id = meta_info["study"] except: disease = None dil_study_id = None marker = None if "marker" in feature_doc: marker = feature_doc["marker"] if marker is None or disease is None: return result_container p_val = feature_doc["p_value"] if p_val is None: return result_container global counter counter = counter + 1 p_val_to_compare = float(p_val) if p_val_to_compare < gw_sig_p: if dil_study_id is None or dil_study_id == "None": first_author = "NA" dil_study_id = "NA" else: query = ElasticQuery(Query.ids([dil_study_id])) elastic = Search(search_query=query, idx=ElasticSettings.idx("STUDY", "STUDY"), size=1) study_doc = elastic.search().docs[0] author = getattr(study_doc, "authors")[0] first_author = author["name"] + " " + author["initials"] fnotes = { "linkdata": "pval", "linkvalue": p_val_to_compare, "linkid": dil_study_id, "linkname": first_author, } result_container_populated = cls.populate_container( dil_study_id, first_author, fnotes=fnotes, features=[marker], diseases=[disease], result_container=result_container, ) return result_container_populated else: return result_container
def filter_queryset(self, request, queryset, view): ''' Get disease regions. ''' try: filterable = getattr(view, 'filter_fields', []) filters = dict([(k, v) for k, v in request.GET.items() if k in filterable]) dis = filters.get('disease', 'T1D') show_genes = filters.get('genes', False) show_markers = filters.get('markers', False) show_regions = filters.get('regions', True) build = self._get_build(filters.get('build', settings.DEFAULT_BUILD)) docs = DiseaseLocusDocument.get_disease_loci_docs(dis) if len(docs) == 0: messages.error(request, 'No regions found for '+dis+'.') visible_hits = DiseaseLocusDocument.get_hits([h for r in docs for h in getattr(r, 'hits')]) regions = [] all_markers = [] all_genes = [] ens_all_cand_genes = [] for r in docs: region = r.get_disease_region(visible_hits, build=build) if region is not None: ens_all_cand_genes.extend(region['ens_cand_genes']) all_markers.extend(region['markers']) region['hits'] = [self._study_hit_obj(s, region) for s in StudyHitDocument.process_hits(r.hit_docs, region['all_diseases'])] (all_coding, all_non_coding) = views.get_genes_for_region(getattr(r, "seqid"), region['rstart']-500000, region['rstop']+500000) (region_coding, coding_up, coding_down) = views._region_up_down(all_coding, region['rstart'], region['rstop']) (region_non_coding, non_coding_up, non_coding_down) = \ views._region_up_down(all_non_coding, region['rstart'], region['rstop']) region['genes'] = { 'upstream': {'coding': [g.doc_id() for g in coding_up], 'non_coding': [g.doc_id() for g in non_coding_up]}, 'region': {'coding': [g.doc_id() for g in region_coding], 'non_coding': [g.doc_id() for g in region_non_coding]}, 'downstream': {'coding': [g.doc_id() for g in coding_down], 'non_coding': [g.doc_id() for g in non_coding_down]}, } all_genes.extend(region['genes']['region']['coding']) all_genes.extend(region['genes']['region']['non_coding']) regions.append(region) # look for pleiotropy by looking for diseases for the markers in IC_STATS and other study hits stats_query = ElasticQuery.filtered(Query.terms("marker", all_markers), Filter(RangeQuery("p_value", lte=5E-08))) stats_docs = Search(stats_query, idx=ElasticSettings.idx("IC_STATS"), size=len(all_markers)).search().docs meta_response = Search.elastic_request(ElasticSettings.url(), ElasticSettings.idx("IC_STATS") + '/_mapping', is_post=False) # get ensembl to gene symbol mapping for all candidate genes extra_markers = [] for region in regions: # add diseases from IC/GWAS stats (study_ids, region['marker_stats']) = views._process_stats(stats_docs, region['markers'], meta_response) region['all_diseases'].extend([getattr(mstat, 'disease') for mstat in region['marker_stats']]) other_hits_query = ElasticQuery( BoolQuery(must_arr=[RangeQuery("tier", lte=2), Query.terms("marker", region['markers'])], must_not_arr=[Query.terms("dil_study_id", study_ids)])) other_hits = Search(other_hits_query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'), size=100).search() region['extra_markers'] = [self._study_hit_obj(s, region) for s in StudyHitDocument.process_hits(other_hits.docs, region['all_diseases'])] region['all_diseases'] = list(set(region['all_diseases'])) extra_markers.extend([m['marker_id'] for m in region['extra_markers']]) # get markers marker_objs = [] if show_markers: query = ElasticQuery(Query.terms("id", all_markers), sources=['id', 'start']) marker_docs = Search(search_query=query, idx=ElasticSettings.idx('MARKER', 'MARKER'), size=len(all_markers)).search().docs mids = {getattr(m, 'id'): getattr(m, 'start') for m in marker_docs} marker_objs = [h for r in regions for h in r['hits']] marker_objs.extend([h for r in regions for h in r['extra_markers']]) for m in marker_objs: m['start'] = mids[m['marker_id']] # get genes gene_objs = [] if show_genes: all_genes.extend(ens_all_cand_genes) gene_docs = GeneDocument.get_genes(all_genes, sources=['start', 'stop', 'chromosome', 'symbol', 'biotype']) for doc in Document.sorted_alphanum(gene_docs, 'chromosome'): ensembl_id = doc.doc_id() region_name = '' candidate_gene = 0 for region in regions: if ('genes' in region and (ensembl_id in region['genes']['region']['coding'] or ensembl_id in region['genes']['region']['non_coding'] or ensembl_id in region['ens_cand_genes'])): region_name = region['region_name'] candidate_gene = 1 if ensembl_id in region['ens_cand_genes'] else 0 break gene_objs.append({ 'ensembl_id': ensembl_id, 'seqid': 'chr'+getattr(doc, 'chromosome'), 'start': getattr(doc, 'start'), 'end': getattr(doc, 'stop'), 'symbol': getattr(doc, 'symbol'), 'biotype': getattr(doc, 'biotype'), 'region_name': region_name, 'candidate_gene': candidate_gene }) if show_regions == 'false': regions = [] regions.extend(gene_objs) regions.extend(marker_objs) return regions except (TypeError, ValueError, IndexError, ConnectionError) as e: print(e) raise Http404
def get_disease(cls, request, disease, context): disease = disease.lower() if disease is None: messages.error(request, 'No disease given.') raise Http404() query = ElasticQuery(Query.terms("code", [disease.split(',')])) elastic = Search(query, idx=ElasticSettings.idx('DISEASE', 'DISEASE'), size=5) res = elastic.search() if res.hits_total == 0: messages.error(request, 'Disease(s) '+disease+' not found.') elif res.hits_total < 9: disease_docs = res.docs names = ', '.join([getattr(doc, 'name') for doc in disease_docs]) meta_response = Search.elastic_request(ElasticSettings.url(), ElasticSettings.idx("IC_STATS") + '/_mapping', is_post=False) elastic_meta = json.loads(meta_response.content.decode("utf-8")) disease_docs = res.docs for dis in disease_docs: dis_code = getattr(dis, 'code').upper() docs = DiseaseLocusDocument.get_disease_loci_docs(dis_code) regions = [] ens_all_cand_genes = [] all_markers = [] for r in docs: region = r.get_disease_region() if region is not None: regions.append(region) ens_all_cand_genes.extend(region['ens_cand_genes']) all_markers.extend(region['markers']) # get ensembl to gene symbol mapping for all candidate genes all_cand_genes = gene.utils.get_gene_docs_by_ensembl_id(ens_all_cand_genes) for region in regions: region['cand_genes'] = {cg: all_cand_genes[cg] for cg in region.pop("ens_cand_genes", None)} setattr(dis, 'regions', regions) # look for pleiotropy by looking for diseases for the markers in IC_STATS and other study hits stats_query = ElasticQuery.filtered(Query.terms("marker", all_markers), Filter(RangeQuery("p_value", lte=5E-08)), sources=['marker']) stats_docs = Search(stats_query, idx=ElasticSettings.idx("IC_STATS"), size=len(all_markers)).search().docs other_hits_query = ElasticQuery( BoolQuery(must_arr=[RangeQuery("tier", lte=2), Query.terms("marker", all_markers)]), sources=['marker', 'disease']) other_hits = Search(other_hits_query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'), size=5000).search().docs for region in regions: diseases = [dis_code] for doc in stats_docs: if getattr(doc, 'marker') in region['markers']: meta_info = elastic_meta[doc.index()]['mappings'][doc.type()]['_meta'] if meta_info['disease'] not in diseases: diseases.append(meta_info['disease']) for doc in other_hits: if getattr(doc, 'marker') in region['markers']: if doc.disease is not None and doc.disease not in diseases: diseases.append(doc.disease) region['diseases'] = diseases studies = StudyDocument.get_studies(disease_code=dis_code) for doc in studies: setattr(doc, 'study_id', getattr(doc, 'study_id').replace('GDXHsS00', '')) pmid = getattr(doc, 'principal_paper') pubs = PublicationDocument.get_publications(pmid, sources=['date', 'authors.name', 'journal']) if len(pubs) > 0: authors = getattr(pubs[0], 'authors') setattr(doc, 'date', getattr(pubs[0], 'date')) setattr(doc, 'journal', getattr(pubs[0], 'journal')) setattr(doc, 'author', authors[0]['name'].rsplit(None, 1)[-1] if authors else "") setattr(dis, 'studies', studies) context['features'] = disease_docs context['title'] = names return context raise Http404()
def tearDownIdx(cls, idx_name_arr): ''' Remove indices by their key names (e.g. ['GENE', 'DIISEASE', ...]). ''' for name in idx_name_arr: requests.delete(ElasticSettings.url() + '/' + PydginTestSettings.IDX[name]['indexName'])
def get_regions(cls, request, dis, context): # is_authenticated = False elastic_url = ElasticSettings.url() (core, other) = Disease.get_site_diseases(dis_list=dis.upper().split(',')) if len(core) == 0 and len(other) == 0: messages.error(request, 'Disease '+dis+' not found.') raise Http404() disease = core[0] if len(core) > 0 else other[0] context['title'] = getattr(disease, "name")+" Regions" docs = DiseaseLocusDocument.get_disease_loci_docs(dis) if len(docs) == 0: messages.error(request, 'No regions found for '+dis+'.') raise Http404() visible_hits = DiseaseLocusDocument.get_hits([h for r in docs for h in getattr(r, 'hits')]) meta_response = Search.elastic_request(elastic_url, ElasticSettings.idx("IC_STATS") + '/_mapping', is_post=False) regions = [] ens_all_cand_genes = [] all_markers = [] for r in docs: region = r.get_disease_region(visible_hits) if region is not None: ens_all_cand_genes.extend(region['ens_cand_genes']) all_markers.extend(region['markers']) region['hits'] = StudyHitDocument.process_hits(r.hit_docs, region['all_diseases']) (all_coding, all_non_coding) = get_genes_for_region(getattr(r, "seqid"), region['rstart']-500000, region['rstop']+500000) (region_coding, coding_up, coding_down) = _region_up_down(all_coding, region['rstart'], region['rstop']) (region_non_coding, non_coding_up, non_coding_down) = \ _region_up_down(all_non_coding, region['rstart'], region['rstop']) region['genes'] = { 'upstream': {'coding': coding_up, 'non_coding': non_coding_up}, 'region': {'coding': region_coding, 'non_coding': region_non_coding}, 'downstream': {'coding': coding_down, 'non_coding': non_coding_down}, } regions.append(region) # look for pleiotropy by looking for diseases for the markers in IC_STATS and other study hits stats_query = ElasticQuery.filtered(Query.terms("marker", all_markers), Filter(RangeQuery("p_value", lte=5E-08))) stats_docs = Search(stats_query, idx=ElasticSettings.idx("IC_STATS"), size=len(all_markers)).search().docs # get ensembl to gene symbol mapping for all candidate genes all_cand_genes = gene.utils.get_gene_docs_by_ensembl_id(ens_all_cand_genes) for region in regions: region['cand_genes'] = {cg: all_cand_genes[cg] for cg in region.pop("ens_cand_genes", None)} (study_ids, region['marker_stats']) = _process_stats(stats_docs, region['markers'], meta_response) # add diseases from IC/GWAS stats region['all_diseases'].extend([getattr(mstat, 'disease') for mstat in region['marker_stats']]) other_hits_query = ElasticQuery( BoolQuery(must_arr=[RangeQuery("tier", lte=2), Query.terms("marker", region['markers'])], must_not_arr=[Query.terms("dil_study_id", study_ids)])) other_hits = Search(other_hits_query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'), size=100).search() region['extra_markers'] = StudyHitDocument.process_hits(other_hits.docs, region['all_diseases']) context['regions'] = regions context['disease_code'] = [dis] context['disease'] = getattr(disease, "name") return context
def get_elastic_settings_with_user_uploads(cls, elastic_dict=None, new_upload_file=None): '''Get the updated elastic settings with user uploaded idx_types''' idx_key = 'CP_STATS_UD' idx = ElasticSettings.idx(idx_key) ''' Check if an index type exists in elastic and later check there is a contenttype/model for the given elastic index type. ''' # @IgnorePep8 elastic_url = ElasticSettings.url() url = idx + '/_mapping' response = Search.elastic_request(elastic_url, url, is_post=False) ''' why don't we use Search.get_mapping ? I guess it's not a class method''' #logger.debug(response.json()) if "error" in response.json(): logger.warn(response.json()) return None # get idx_types from _mapping elastic_mapping = json.loads(response.content.decode("utf-8")) # here if we use aliasing then idx can be different # this causes problems as it's effectively hardcoded # this should fix to handle things where aliases are deployed idx = list(elastic_mapping.keys())[0] idx_types = list(elastic_mapping[idx]['mappings'].keys()) if elastic_dict is None: elastic_dict = ElasticSettings.attrs().get('IDX') idx_type_dict = {} existing_ct = [ ct.name for ct in ContentType.objects.filter( app_label=cls.PERMISSION_MODEL_APP_NAME) ] for idx_type in idx_types: idx_type_with_suffix = idx_type + cls.PERMISSION_MODEL_TYPE_SUFFIX for ct in existing_ct: if ct.endswith(idx_type_with_suffix): meta_url = idx + '/' + idx_type + '/_meta/_source' meta_response = Search.elastic_request(elastic_url, meta_url, is_post=False) try: elastic_meta = json.loads( meta_response.content.decode("utf-8")) label = elastic_meta['label'] except: label = "UD-" + idx_type idx_type_dict['UD-' + idx_type.upper()] = { 'label': label, 'type': idx_type } if new_upload_file is not None: idx_type = new_upload_file label = "UD-" + idx_type idx_type_dict['UD-' + idx_type.upper()] = { 'label': label, 'type': idx_type } elastic_dict['CP_STATS_UD']['idx_type'] = idx_type_dict return elastic_dict
) elastic2 = Search(search_query=query2, idx=idx, idx_type='hits', size=int(disease_bucket['doc_count']+1), qsort=Sort(buildSort)) results = elastic2.search() minPos = 0 maxPos = 0 tier = 4 weight = 0 regionCount = 1 regionName = '' species = '' doc_ids = [] if len(results.docs) > 0: for doc in results.docs: # print(doc) os.system("curl -XPOST '"+ElasticSettings.url()+"/"+idx+"/hits/" + doc.doc_id() + "/_update?pretty' -d '{\"doc\": {\"disease_locus\": \"TBC\"}}' > /dev/null 2>&1") build_info = None for b in getattr(doc, 'build_info'): if b['build'] == build: build_info = b if build_info is None: print("ERROR - no build information found for b"+str(build)) continue # print(getattr(doc, "disease")+"\t"+getattr(doc, "marker")+"\t" + getattr(doc, "chr_band") + "\t" + # build_info['seqid'] + "\t" + str(build_info['start']) + "\t" + str(build_info['end'])) if minPos == 0 and maxPos == 0: minPos = build_info['start'] maxPos = build_info['end']
def tearDownModule(): ''' Remove test indices ''' requests.delete(ElasticSettings.url() + '/' + IDX['MARKER']['indexName']) requests.delete(ElasticSettings.url() + '/' + IDX['GFF_GENERIC']['indexName']) requests.delete(ElasticSettings.url() + '/' + IDX['JSON_NESTED']['indexName'])
def process_criteria(cls, feature, section, config, sub_class, test=False): ''' Top level function that calls the right criteria implementation based on the subclass passed. Iterates over all the documents using the ScanAndScroll and the hits are processed by the inner function process_hits. The entire result is stored in result_container (a dict), and at the end of the processing, the result is loaded in to the elastic index after creating the mapping @type feature: string @param feature: feature type, could be 'gene','region', 'marker' etc., @type section: string @keyword section: The section in the criteria.ini file @type config: string @keyword config: The config object initialized from criteria.ini. @type sub_class: string @param sub_class: The name of the inherited sub_class where the actual implementation is ''' global gl_result_container gl_result_container = {} test_mode = test if config is None: if test_mode: config = CriteriaManager().get_criteria_config(ini_file='test_criteria.ini') else: config = CriteriaManager().get_criteria_config(ini_file='criteria.ini') section_config = config[section] source_idx = section_config['source_idx'] if ',' in source_idx: idxs = source_idx.split(',') idx_all = [ElasticSettings.idx(idx) for idx in idxs] source_idx = ','.join(idx_all) else: source_idx = ElasticSettings.idx(section_config['source_idx']) source_idx_type = None if 'source_idx_type' in section_config: source_idx_type = section_config['source_idx_type'] if source_idx_type is not None: source_idx = ElasticSettings.idx(section_config['source_idx'], idx_type=section_config['source_idx_type']) else: source_idx_type = '' logger.warning(source_idx + ' ' + source_idx_type) def process_hits(resp_json): global gl_result_container hits = resp_json['hits']['hits'] global hit_counter for hit in hits: hit_counter = hit_counter + 1 result_container = sub_class.tag_feature_to_disease(hit, section, config, result_container=gl_result_container) gl_result_container = result_container if test_mode: if gl_result_container is not None and len(gl_result_container) > 5: return query = cls.get_elastic_query(section, config) if test_mode: result_size = len(gl_result_container) from_ = 0 size_ = 20 while (result_size < 1): from_ = from_ + size_ url = ElasticSettings.url() if 'mhc' in section: url_search = (source_idx + '/_search') else: url_search = (source_idx + '/_search?from=' + str(from_) + '&size=' + str(size_)) if query is None: query = { "query": {"match_all": {}}, "size": 20 } response = Search.elastic_request(url, url_search, data=json.dumps(query)) query = None else: # print(query) response = Search.elastic_request(url, url_search, data=json.dumps(query.query)) process_hits(response.json()) if gl_result_container is not None: result_size = len(gl_result_container) else: ScanAndScroll.scan_and_scroll(source_idx, call_fun=process_hits, query=query) cls.map_and_load(feature, section, config, gl_result_container)
def chicpeaDeleteUD(request, url): queryDict = request.POST idx_type = queryDict.get("userDataIdx") idx = ElasticSettings.idx('CP_STATS_UD') output = subprocess.check_output("curl -XDELETE '"+ElasticSettings.url()+"/"+idx+"/"+idx_type+"'", shell=True) return HttpResponse(output, content_type="application/json")