def test_create_idx_type_model_permissions(self):
        elastic_settings_before = ElasticSettings.attrs().get('IDX')
        user_types_before = elastic_settings_before['CP_STATS_UD']['idx_type']
        self.assertEqual({}, user_types_before,
                         'CP_STATS_UD idx_type is empty')

        idx = "cp:hg19_userdata_bed"
        new_upload_file = "tmp_newly_uploaded_file"
        idx_type = new_upload_file

        os.system(
            "curl -XPUT " + ElasticSettings.url() + "/" + idx + "/_mapping/" +
            idx_type + " -d '{\"" + idx_type +
            "\":{ \"properties\" : {\"message\" : {\"type\" : \"string\", \"store\" : true } } }}'"
        )

        os.system("curl -XPUT " + ElasticSettings.url() + "/" + idx + "/" +
                  idx_type + "/_meta -d '{\"label\": \"" + new_upload_file +
                  "\", \"owner\": \"" + self.user.username +
                  "\", \"uploaded\": \"" + str(timezone.now()) + "\"}'")

        elastic_settings_after = elastic_factory.create_idx_type_model_permissions(
            self.user,
            indexKey='CP_STATS_UD',
            indexTypeKey='UD-' + new_upload_file.upper(),  # @IgnorePep8
            new_upload_file="tmp_newly_uploaded_file")  # @IgnorePep8

        # elastic_settings_after = elastic_factory.get_elastic_settings_with_user_uploads(elastic_settings_before)
        user_types_after = elastic_settings_after['CP_STATS_UD']['idx_type']
        self.assertTrue(len(user_types_after) > 0, "Has user idx_types ")
        self.assertTrue('UD-TMP_NEWLY_UPLOADED_FILE' in user_types_after)
        self.assertEqual(
            user_types_after['UD-TMP_NEWLY_UPLOADED_FILE']['type'],
            'tmp_newly_uploaded_file')
    def tearDown(self):
        ''' Remove loaded test indices and test repository. '''

        key = 'PRIVATE_REGIONS_GFF'
        if key in IDX.keys():
            print(ElasticSettings.url() + '/' + IDX[key]['indexName'])
            requests.delete(ElasticSettings.url() + '/' + IDX[key]['indexName'])
    def test_create_idx_type_model_permissions(self):
        elastic_settings_before = ElasticSettings.attrs().get('IDX')
        user_types_before = elastic_settings_before['CP_STATS_UD']['idx_type']
        self.assertEqual({}, user_types_before, 'CP_STATS_UD idx_type is empty')

        idx = "cp:hg19_userdata_bed"
        new_upload_file = "tmp_newly_uploaded_file"
        idx_type = new_upload_file

        os.system("curl -XPUT "+ElasticSettings.url()+"/"+idx+"/_mapping/"+idx_type+" -d '{\"" +
                  idx_type + "\":{ \"properties\" : {\"message\" : {\"type\" : \"string\", \"store\" : true } } }}'")

        os.system("curl -XPUT "+ElasticSettings.url()+"/"+idx+"/"+idx_type+"/_meta -d '{\"label\": \"" +
                  new_upload_file + "\", \"owner\": \""+self.user.username+"\", \"uploaded\": \"" +
                  str(timezone.now())+"\"}'")

        elastic_settings_after = elastic_factory.create_idx_type_model_permissions(self.user,
                                                                                   indexKey='CP_STATS_UD',
                                                                                   indexTypeKey='UD-'+new_upload_file.upper(),  # @IgnorePep8
                                                                                   new_upload_file="tmp_newly_uploaded_file")  # @IgnorePep8

        # elastic_settings_after = elastic_factory.get_elastic_settings_with_user_uploads(elastic_settings_before)
        user_types_after = elastic_settings_after['CP_STATS_UD']['idx_type']
        self.assertTrue(len(user_types_after) > 0, "Has user idx_types ")
        self.assertTrue('UD-TMP_NEWLY_UPLOADED_FILE' in user_types_after)
        self.assertEqual(user_types_after['UD-TMP_NEWLY_UPLOADED_FILE']['type'], 'tmp_newly_uploaded_file')
Exemple #4
0
    def tearDown(self):
        ''' Remove loaded test indices and test repository. '''

        key = 'PRIVATE_REGIONS_GFF'
        if key in IDX.keys():
            print(ElasticSettings.url() + '/' + IDX[key]['indexName'])
            requests.delete(ElasticSettings.url() + '/' +
                            IDX[key]['indexName'])
def tearDownModule():
    if os.path.exists(TEST_DATA_DIR + "/STAGE"):
        shutil.rmtree(TEST_DATA_DIR + "/STAGE")
    # remove index created
    INI_CONFIG = IniParser().read_ini(MY_INI_FILE)
    requests.delete(ElasticSettings.url() + "/" + INI_CONFIG["GENE_HISTORY"]["index"])
    requests.delete(ElasticSettings.url() + "/" + INI_CONFIG["DBSNP"]["index"])
    os.remove(MY_INI_FILE)
    ens_dir = os.path.join(TEST_DATA_DIR, "DOWNLOAD", "ENSMART_GENE")
    if os.path.exists(ens_dir):
        shutil.rmtree(ens_dir)
Exemple #6
0
    def scan_and_scroll(self, idx, call_fun=None, idx_type='', url=None,
                        time_to_keep_scoll=1, query=None):
        ''' Scan and scroll an index and optionally provide a function argument to
        process the hits. '''
        if url is None:
            url = ElasticSettings.url()

        url_search_scan = (idx + '/' + idx_type + '/_search?search_type=scan&scroll=' +
                           str(time_to_keep_scoll) + 'm')
        if query is None:
            query = {
                "query": {"match_all": {}},
                "size":  1000
            }
        else:
            if not isinstance(query, ElasticQuery):
                raise QueryError("not a Query")
            query = query.query

        response = Search.elastic_request(url, url_search_scan, data=json.dumps(query))
        _scroll_id = response.json()['_scroll_id']
        url_scan_scroll = '_search/scroll?scroll=' + str(time_to_keep_scoll) + 'm'

        count = 0
        while True:
            response = Search.elastic_request(url, url_scan_scroll, data=_scroll_id)
            _scroll_id = response.json()['_scroll_id']
            hits = response.json()['hits']['hits']
            nhits = len(hits)
            if nhits == 0:
                break
            count += nhits
            if call_fun is not None:
                call_fun(response.json())
        logger.debug("Scanned No. Docs ( "+idx+"/"+idx_type+" ) = "+str(count))
Exemple #7
0
def chicpeaFileUpload(request, url):
    filesDict = request.FILES
    files = filesDict.getlist("files[]")
    print(files)
    snpTracks = list()
    idx = getattr(chicp_settings, 'CHICP_IDX').get('userdata').get('INDEX')

    for f in files:
        line = f.readlines()[0].decode()
        if line.startswith("#"):
            line = f.readlines()[1].decode()

        parts = re.split("\t", line)
        if re.match("\s", line):
            parts = re.split("\s", line)

        if len(parts) != 5:
            logger.warn("WARNING: unexpected number of columns ("+len(parts)+"): "+line)
            continue

        f.seek(0)
        bedFile = NamedTemporaryFile(delete=False)
        bedFile.write(f.read())
        bedFile.close()
        idx_type = os.path.basename(bedFile.name)
        snpTracks.append({"value": idx_type, "text":  f.name})
        os.system("curl -XDELETE '"+ElasticSettings.url()+"/"+idx+"/"+idx_type+"'")
        call_command("index_search", indexName=idx, indexType=idx_type, indexBED=bedFile.name)
        logger.debug("--indexName "+idx+" --indexType "+idx_type+" --indexBED "+bedFile.name)
        bedFile.delete

    context = dict()
    context['userSNPTracks'] = snpTracks
    return HttpResponse(json.dumps(context), content_type="application/json")
def tearDownModule():
    # remove index created
    INI_CONFIG = IniParser().read_ini(MY_PUB_INI_FILE)
    requests.delete(ElasticSettings.url() + '/' + INI_CONFIG['DISEASE']['index'])
    os.remove(MY_PUB_INI_FILE)
    if os.path.exists(TEST_DATA_DIR + '/STAGE'):
        shutil.rmtree(TEST_DATA_DIR + '/STAGE')
Exemple #9
0
 def delete_repository(cls, repo):
     url = ElasticSettings.url() + '/_snapshot/' + repo
     resp = requests.delete(url)
     if resp.status_code != 200:
         logger.error("Status (" + url + "): " + str(resp.status_code) +
                      " :: " + str(resp.json()["error"]))
         return False
     return True
Exemple #10
0
 def get_count(self):
     ''' Return the elastic count for a query result '''
     url = self.idx + '/' + self.idx_type + '/_count?'
     data = {}
     if hasattr(self, 'query'):
         data = json.dumps(self.query)
     response = Search.elastic_request(ElasticSettings.url(), url, data=data)
     return response.json()
Exemple #11
0
 def exists(cls, repo, snapshot):
     ''' Test if the repository/snapshot exists. '''
     url = ElasticSettings.url() + '/_snapshot/' + repo + '/' + snapshot
     resp = requests.get(url)
     if resp.status_code != 200:
         return False
     else:
         return True
Exemple #12
0
    def load(self, idx, idx_type, json_data, elastic_url=None):
        ''' Bulk load documents. '''
        if elastic_url is None:
            elastic_url = ElasticSettings.url()
        resp = requests.put(ElasticSettings.url()+'/' + idx+'/' + idx_type +
                            '/_bulk', data=json_data)
        if(resp.status_code != 200):
            logger.error('ERROR: '+idx+' load status: '+str(resp.status_code)+' '+str(resp.content))

        # report errors found during loading
        if 'errors' in resp.json() and resp.json()['errors']:
            logger.error("ERROR: bulk load error found")
            for item in resp.json()['items']:
                for key in item.keys():
                    if 'error' in item[key]:
                        logger.error("ERROR LOADING:")
                        logger.error(item)
        return resp
Exemple #13
0
    def __init__(self, search_query=None, aggs=None, search_from=0, size=20,
                 search_type=None, idx=ElasticSettings.idx('DEFAULT'), idx_type='',
                 qsort=None, elastic_url=None):
        ''' Set up parameters to use in the search. L{ElasticQuery} is used to
        define a search query.
        @type  search_query: L{ElasticQuery}
        @keyword search_query: The elastic query to search (default: None).
        @type  aggs: L{Aggs}
        @keyword aggs: Aggregations used in the search.
        @type  search_from: integer
        @keyword search_from: Offset used in paginations (default: 0).
        @type  size: integer
        @keyword size: maximum number of hits to return (default: 20).
        @type search_type: bool
        @keyword search_type: Set search type = count for aggregations.
        @type  idx: string
        @keyword idx: index to search (default: default index defined in settings).
        @type  idx_type: string
        @keyword idx_type: index type (default: '').
        @type  qsort: Sort
        @keyword qsort: defines sorting for the query.
        @type  url: string
        @keyword url: Elastic URL (default: default cluster URL).
        '''
        if search_query is not None:
            if not isinstance(search_query, ElasticQuery):
                raise QueryError("not an ElasticQuery")
            self.query = search_query.query

        if aggs is not None:
            if hasattr(self, 'query'):
                self.query.update(aggs.aggs)
            else:
                self.query = aggs.aggs

        if qsort is not None:
            if not isinstance(qsort, Sort):
                raise QueryError("not a Sort")
            if hasattr(self, 'query'):
                self.query.update(qsort.qsort)
            else:
                logger.error("no query to sort")

        if elastic_url is None:
            elastic_url = ElasticSettings.url()

        self.size = size
        self.search_from = search_from
        self.search_type = search_type
        self.idx = idx
        self.idx_type = idx_type
        self.elastic_url = elastic_url
        if self.search_type is None:
            self.url = (self.idx + '/' + self.idx_type +
                        '/_search?size=' + str(self.size) + '&from='+str(self.search_from))
        else:
            self.url = (self.idx + '/' + self.idx_type + '/_search?search_type='+search_type)
    def get_elastic_settings_with_user_uploads(cls, elastic_dict=None, new_upload_file=None):
        '''Get the updated elastic settings with user uploaded idx_types'''

        idx_key = 'CP_STATS_UD'
        idx = ElasticSettings.idx(idx_key)

        ''' Check if an index type exists in elastic and later check there is a contenttype/model for the given elastic index type. '''  # @IgnorePep8
        elastic_url = ElasticSettings.url()
        url = idx + '/_mapping'
        response = Search.elastic_request(elastic_url, url, is_post=False)
        ''' why don't we use Search.get_mapping ? I guess it's not a class method'''
        #logger.debug(response.json())
        if "error" in response.json():
            logger.warn(response.json())
            return None

        # get idx_types from _mapping
        elastic_mapping = json.loads(response.content.decode("utf-8"))
        # here if we use aliasing then idx can be different
        # this causes problems as it's effectively hardcoded
       # this should fix to handle things where aliases are deployed
        idx = list(elastic_mapping.keys())[0]
        idx_types = list(elastic_mapping[idx]['mappings'].keys())

        if elastic_dict is None:
            elastic_dict = ElasticSettings.attrs().get('IDX')

        idx_type_dict = {}

        existing_ct = [ct.name for ct in ContentType.objects.filter(app_label=cls.PERMISSION_MODEL_APP_NAME)]

        for idx_type in idx_types:

            idx_type_with_suffix = idx_type + cls.PERMISSION_MODEL_TYPE_SUFFIX

            for ct in existing_ct:
                if ct.endswith(idx_type_with_suffix):

                    meta_url = idx + '/' + idx_type + '/_meta/_source'
                    meta_response = Search.elastic_request(elastic_url, meta_url, is_post=False)

                    try:
                        elastic_meta = json.loads(meta_response.content.decode("utf-8"))
                        label = elastic_meta['label']
                    except:
                        label = "UD-" + idx_type

                    idx_type_dict['UD-' + idx_type.upper()] = {'label': label, 'type': idx_type}

        if new_upload_file is not None:
            idx_type = new_upload_file
            label = "UD-" + idx_type
            idx_type_dict['UD-' + idx_type.upper()] = {'label': label, 'type': idx_type}

        elastic_dict['CP_STATS_UD']['idx_type'] = idx_type_dict
        return elastic_dict
Exemple #15
0
 def index_exists(cls, idx, idx_type='', url=None):
     ''' Check if an index exists. '''
     if url is None:
         elastic_url = ElasticSettings.url()
     url = idx + '/' + idx_type + '/_mapping'
     response = Search.elastic_request(elastic_url, url, is_post=False)
     if "error" in response.json():
         logger.warning(response.json())
         return False
     return True
Exemple #16
0
 def index_refresh(cls, idx, url=None):
     ''' Refresh to make all operations performed since the last refresh
     available for search'''
     if url is None:
         elastic_url = ElasticSettings.url()
     response = Search.elastic_request(elastic_url, idx + '/_refresh')
     if "error" in response.json():
         logger.warning(response.content.decode("utf-8"))
         return False
     return True
Exemple #17
0
    def get_meta_info(cls, idx, idx_type):
        elastic_url = ElasticSettings.url()
        meta_url = idx + '/' + idx_type + '/_mapping'
        # print(elastic_url + meta_url)
        meta_response = Search.elastic_request(elastic_url, meta_url, is_post=False)

        try:
            elastic_meta = json.loads(meta_response.content.decode("utf-8"))
            meta_info = elastic_meta[idx]['mappings'][idx_type]['_meta']
            return meta_info
        except:
            return None
Exemple #18
0
 def is_running(cls, repo=''):
     url = ElasticSettings.url() + '/_snapshot/' + repo + '/_status'
     resp = requests.get(url)
     if resp.status_code != 200:
         logger.debug(url + ' :: ' + resp.status_code)
     else:
         json_resp = resp.json()
         try:
             return len(json_resp['snapshots']) > 0
         except Exception as e:
             logger.error(e)
     return False
Exemple #19
0
def chicpeaFileUpload(request, url):
    filesDict = request.FILES
    user = request.user
    files = filesDict.getlist("files[]")
    snpTracks = list()
    idx = ElasticSettings.idx('CP_STATS_UD')

    for f in files:
        line = f.readlines()[0].decode()
        if line.startswith("#"):
            line = f.readlines()[1].decode()

        parts = re.split("\t", line)
        if re.match("\s", line):
            parts = re.split("\s", line)

        if len(parts) != 5:
            logger.warn("WARNING: unexpected number of columns ("+len(parts)+"): "+line)
            continue

        f.seek(0)
        bedFile = NamedTemporaryFile(delete=False)
        bedFile.write(f.read())
        bedFile.close()
        idx_type = os.path.basename(bedFile.name)
        snpTracks.append({"value": "ud-"+idx_type, "text":  f.name})
        os.system("curl -XDELETE '"+ElasticSettings.url()+"/"+idx+"/"+idx_type+"'")
        call_command("index_search", indexName=idx, indexType=idx_type, indexBED=bedFile.name)
        logger.debug("index_search --indexName "+idx+" --indexType "+idx_type+" --indexBED "+bedFile.name)
        os.system("curl -XPUT "+ElasticSettings.url()+"/"+idx+"/"+idx_type+"/_meta -d '{\"label\": \"" + f.name +
                  "\", \"owner\": \""+user.username+"\", \"uploaded\": \""+str(timezone.now())+"\"}'")
        bedFile.delete
        elastic_factory.create_idx_type_model_permissions(user, indexKey='CP_STATS_UD',
                                                          indexTypeKey='UD-'+idx_type.upper())

    context = dict()
    context['userSNPTracks'] = snpTracks

    return HttpResponse(json.dumps(context), content_type="application/json")
Exemple #20
0
def chicpeaFileUpload(request, url):
    filesDict = request.FILES
    user = request.user
    files = filesDict.getlist("files[]")
    snpTracks = list()
    idx = ElasticSettings.idx('CP_STATS_UD')

    for f in files:
        line = f.readlines()[0].decode()
        if line.startswith("#"):
            line = f.readlines()[1].decode()

        parts = re.split("\t", line)
        if re.match("\s", line):
            parts = re.split("\s", line)

        if len(parts) != 5:
            logger.warn("WARNING: unexpected number of columns ("+len(parts)+"): "+line)
            continue

        f.seek(0)
        bedFile = NamedTemporaryFile(delete=False)
        bedFile.write(f.read())
        bedFile.close()
        idx_type = os.path.basename(bedFile.name)
        snpTracks.append({"value": "ud-"+idx_type, "text":  f.name})
        os.system("curl -XDELETE '"+ElasticSettings.url()+"/"+idx+"/"+idx_type+"'")
        call_command("index_search", indexName=idx, indexType=idx_type, indexBED=bedFile.name)
        logger.debug("index_search --indexName "+idx+" --indexType "+idx_type+" --indexBED "+bedFile.name)
        os.system("curl -XPUT "+ElasticSettings.url()+"/"+idx+"/"+idx_type+"/_meta -d '{\"label\": \"" + f.name +
                  "\", \"owner\": \""+user.username+"\", \"uploaded\": \""+str(timezone.now())+"\"}'")
        bedFile.delete
        elastic_factory.create_idx_type_model_permissions(user, indexKey='CP_STATS_UD',
                                                          indexTypeKey='UD-'+idx_type.upper())

    context = dict()
    context['userSNPTracks'] = snpTracks

    return HttpResponse(json.dumps(context), content_type="application/json")
Exemple #21
0
    def update_doc(cls, doc, part_doc, elastic_url=None):
        ''' Update a document with a partial document.  '''
        if elastic_url is None:
            elastic_url = ElasticSettings.url()
        url = (doc._meta['_index'] + '/' +
               doc.type() + '/' + doc._meta['_id'] + '/_update')
        response = Search.elastic_request(elastic_url, url, data=json.dumps(part_doc))

        logger.debug("curl -XPOST '" + elastic_url + url + "' -d '" + json.dumps(part_doc) + "'")
        if response.status_code != 200:
            logger.warning("Error: elastic response 200:" + url)
            logger.warning(response.json())
        return response.json()
    def test_mapping_parent_child(self):
        ''' Test creating mapping with parent child relationship. '''
        gene_mapping = MappingProperties("gene")
        gene_mapping.add_property("symbol", "string", analyzer="full_name")
        inta_mapping = MappingProperties("publication", "gene")
        load = Loader()
        idx = "test__mapping__"+SEARCH_SUFFIX
        options = {"indexName": idx, "shards": 1}
        requests.delete(ElasticSettings.url() + '/' + idx)

        # add child mappings first
        status = load.mapping(inta_mapping, "publication", analyzer=Loader.KEYWORD_ANALYZER, **options)
        self.assertTrue(status, "mapping inteactions")
        status = load.mapping(gene_mapping, "gene", analyzer=Loader.KEYWORD_ANALYZER, **options)
        self.assertTrue(status, "mapping genes")

        ''' load docs and test has parent query'''
        json_data = '{"index": {"_index": "%s", "_type": "gene", "_id" : "1"}}\n' % idx
        json_data += json.dumps({"symbol": "PAX1"}) + '\n'
        json_data += '{"index": {"_index": "%s", "_type": "publication", "_id" : "2", "parent": "1"}}\n' % idx
        json_data += json.dumps({"pubmed": 1234}) + '\n'
        Bulk.load(idx, '', json_data)
        Search.index_refresh(idx)
        query = ElasticQuery.has_parent('gene', Query.match('symbol', 'PAX1'))
        elastic = Search(query, idx=idx, idx_type='publication', size=500)
        docs = elastic.search().docs
        self.assertEquals(len(docs), 1)
        self.assertEquals(getattr(docs[0], 'pubmed'), 1234)
        self.assertEquals(docs[0].parent(), '1')
        self.assertRaises(QueryError, ElasticQuery.has_parent, 'gene', 'xxxxx')

        ''' test has child query '''
        query = ElasticQuery.has_child('publication', Query.match('pubmed', 1234))
        elastic = Search(query, idx=idx, idx_type='gene', size=500)
        docs = elastic.search().docs
        self.assertEquals(len(docs), 1)
        self.assertEquals(getattr(docs[0], 'symbol'), 'PAX1')
        self.assertEquals(docs[0].parent(), None)
        requests.delete(ElasticSettings.url() + '/' + idx)
Exemple #23
0
 def show(cls, repo, snapshots, all_repos):
     ''' Show the information for the named snapshots. '''
     if all_repos:
         repo = ''
         snapshots = ''
     url = ElasticSettings.url() + '/_snapshot/' + repo + '/' + snapshots
     resp = requests.get(url)
     if resp.status_code != 200:
         logger.error("Returned status (for " + url + "): " +
                      str(resp.status_code))
         logger.error(resp.json()["error"])
         return False
     print(json.dumps(resp.json(), indent=4))
     return True
Exemple #24
0
 def get_mapping(self, mapping_type=None):
     ''' Return the mappings for an index (host:port/{index}/_mapping/{type}). '''
     self.mapping_url = (self.idx + '/_mapping')
     if mapping_type is not None:
         self.mapping_url += '/'+mapping_type
     elif self.idx_type is not None:
         self.mapping_url += '/'+self.idx_type
     response = Search.elastic_request(ElasticSettings.url(), self.mapping_url, is_post=False)
     if response.status_code != 200:
         json_err = json.dumps({"error": response.status_code,
                                "response": response.content.decode("utf-8"),
                                "url": self.mapping_url})
         logger.warning(json_err)
         return json_err
     return response.json()
Exemple #25
0
    def get_criteria_index_types(cls, idx_key):

        idx = ElasticSettings.idx(idx_key)
        elastic_url = ElasticSettings.url()
        url = idx + '/_mappings'
        response = Search.elastic_request(elastic_url, url, is_post=False)

        if "error" in response.json():
            logger.warn(response.json())
            return None

        # get idx_types from _mapping
        elastic_mapping = json.loads(response.content.decode("utf-8"))
        idx_types = list(elastic_mapping[idx]['mappings'].keys())
        return idx_types
Exemple #26
0
    def suggest(cls, term, idx, elastic_url=ElasticSettings.url(),
                name='data', field='suggest', context=None, size=5):
        ''' Auto completion suggestions for a given term. '''
        if elastic_url is None:
            elastic_url = ElasticSettings.url()

        url = (idx + '/' + '/_suggest')
        suggest = {
            name: {
                "text": term,
                "completion": {
                    "field": field,
                    "size": size
                }
            }
        }
        if context is not None:
            suggest[name]['completion'].update(context)
        response = Search.elastic_request(elastic_url, url, data=json.dumps(suggest))
        logger.debug("curl -XPOST '" + elastic_url + '/' + url + "' -d '" + json.dumps(suggest) + "'")
        if response.status_code != 200:
            logger.warning("Suggeter Error: elastic response 200:" + url)
            logger.warning(response.json())
        return response.json()
Exemple #27
0
    def create_repository(self, repo, location):
        url = ElasticSettings.url() + '/_snapshot/' + repo
        if Snapshot.exists(repo, ''):
            logger.error("Repository " + repo + " already exists!")
            return False
        parent = os.path.abspath(os.path.join(location, ".."))

        if not os.path.isdir(parent):
            logger.warn("Check directory exists: " + parent)

        data = {"type": "fs", "settings": {"location": location}}
        resp = requests.put(url, data=json.dumps(data))
        if resp.status_code != 200:
            logger.error("Status (" + url + "): " + str(resp.status_code) +
                         " :: " + str(resp.json()["error"]))
        return True
Exemple #28
0
 def test_criteria_mappings(self, idx, idx_types):
     (main_codes, other_codes) = CriteriaManager.get_available_diseases()
     site_enabled_diseases = main_codes + other_codes
     elastic_url = ElasticSettings.url()
     for idx_type in idx_types:
         url = idx + '/' + idx_type + '/_mapping'
         response = Search.elastic_request(elastic_url, url, is_post=False)
         elastic_type_mapping = json.loads(response.content.decode("utf-8"))
         property_keys = list(elastic_type_mapping[idx]['mappings'][idx_type]['properties'].keys())
         '''check if score and disease_tags and qid are there in mapping'''
         self.assertIn('score', property_keys)
         self.assertIn('disease_tags', property_keys)
         self.assertIn('qid', property_keys)
         '''check if all the enabled diseases are there'''
         for disease in site_enabled_diseases:
             self.assertIn(disease, property_keys)
    def get_models_to_delete(self):
        '''Get models to delete'''
        idx_key = 'CP_STATS_UD'
        idx = ElasticSettings.idx(idx_key)
        ''' Check if an index type exists in elastic and later check there is a contenttype/model for the given elastic index type. '''  # @IgnorePep8
        elastic_url = ElasticSettings.url()
        url = idx + '/_mapping'
        response = Search.elastic_request(elastic_url, url, is_post=False)

        if "error" in response.json():
            logger.warn(response.json())
            return None

        # get idx_types from _mapping
        elastic_mapping = json.loads(response.content.decode("utf-8"))
        ## fix needed if we deploy aliasing for indices
        idx = list(elastic_mapping.keys())[0]
        idx_types = list(elastic_mapping[idx]['mappings'].keys())

        models2go = []
        expire_days = 7  # 1 week

        # add idx_types that have no docs
        for idx_type in idx_types:
            ndocs = Search(idx=idx, idx_type=idx_type).get_count()['count']

            if (ndocs > 0):
                models2go.append(idx_type)

            # add idx_types that were not accessed for a given time period
            url = idx + '/' + idx_type + '/_meta'
            response = Search.elastic_request(elastic_url, url, is_post=False)
            elastic_meta = json.loads(response.content.decode("utf-8"))
            if '_source' in elastic_meta:
                uploaded_str_date = elastic_meta['_source']['uploaded']
                yymmdd_str = uploaded_str_date.split()[0]
                # Format: 2015-11-03 14:43:54.099645+00:00
                from datetime import datetime as dt
                dt = dt.strptime(yymmdd_str, '%Y-%m-%d')
                uploaded_date = dt.date()

                d1 = datetime.date.today()
                d2 = d1 - datetime.timedelta(days=expire_days)
                if uploaded_date < d2:
                    models2go.append(idx_type)

        return models2go
    def get_models_to_delete(self):
        """Get models to delete"""
        idx_key = "CP_STATS_UD"
        idx = ElasticSettings.idx(idx_key)
        """ Check if an index type exists in elastic and later check there is a contenttype/model for the given elastic index type. """  # @IgnorePep8
        elastic_url = ElasticSettings.url()
        url = idx + "/_mapping"
        response = Search.elastic_request(elastic_url, url, is_post=False)

        if "error" in response.json():
            logger.warn(response.json())
            return None

        # get idx_types from _mapping
        elastic_mapping = json.loads(response.content.decode("utf-8"))
        idx_types = list(elastic_mapping[idx]["mappings"].keys())

        models2go = []
        expire_days = 7  # 1 weeks

        # add idx_types that have no docs
        for idx_type in idx_types:
            ndocs = Search(idx=idx, idx_type=idx_type).get_count()["count"]

            if ndocs <= 1:
                models2go.append(idx_type)

            # add idx_types that were not accessed for a given time period
            url = idx + "/" + idx_type + "/_meta"
            response = Search.elastic_request(elastic_url, url, is_post=False)
            elastic_meta = json.loads(response.content.decode("utf-8"))
            if "_source" in elastic_meta:
                uploaded_str_date = elastic_meta["_source"]["uploaded"]
                yymmdd_str = uploaded_str_date.split()[0]
                # Format: 2015-11-03 14:43:54.099645+00:00
                from datetime import datetime as dt

                dt = dt.strptime(yymmdd_str, "%Y-%m-%d")
                uploaded_date = dt.date()

                d1 = datetime.date.today()
                d2 = d1 - datetime.timedelta(days=expire_days)
                if uploaded_date < d2:
                    models2go.append(idx_type)

        return models2go
Exemple #31
0
 def setupIdx(cls, idx_name_arr):
     ''' Setup indices in the given array of key names (e.g. ['GENE', 'DIISEASE', ...]). '''
     idx_settings = {
         "settings": {
             "analysis": {
                 "analyzer": {
                     "full_name": {"filter": ["standard", "lowercase"], "tokenizer": "keyword"}}
             },
             "number_of_shards": 1
         }
     }
     IDX = PydginTestSettings.IDX
     for name in idx_name_arr:
         requests.put(ElasticSettings.url() + '/' + IDX[name]['indexName'], data=json.dumps(idx_settings))
         call_command('index_search', **IDX[name])
     for name in idx_name_arr:
         # wait for the elastic load to finish
         Search.index_refresh(IDX[name]['indexName'])
def add_disease_locus(seqid, locus_id, regionName, disease, tier, species, weight, doc_ids):
    data = {
        "region_name": disease+" "+regionName,
        "disease": disease,
        "tier": tier,
        "species": species,
        "tags": {"weight": weight},
        "locus_id": locus_id,
        "seqid": seqid,
        "hits": doc_ids
    }
    #    "suggest": {"input": [disease+" "+regionName, regionName], "weight": weight}
    resp = requests.put(ElasticSettings.url()+'/' + idx+'/disease_locus/'+locus_id, data=json.dumps(data))
    if resp.status_code != 201:
        print(str(resp.content))
        print("Problem loading "+getattr(doc, "disease")+" "+regionName)
    else:
        print("Loaded "+locus_id+" - "+regionName)
Exemple #33
0
    def create_snapshot(cls, repo, snapshot, indices):
        ''' Create a snapshot for the specified indices or all if
        indices is None. '''
        url = ElasticSettings.url(
        ) + '/_snapshot/' + repo + '/' + snapshot + '?wait_for_completion=true'
        resp = requests.get(url)
        if resp.status_code == 200:
            logger.error("Snapshot " + snapshot + " already exists!")
            return False

        data = {}
        if indices is not None:
            data = {"indices": indices}
        resp = requests.put(url, data=json.dumps(data))
        if resp.status_code != 200:
            logger.error("Snapshot " + snapshot + " create error! :: " +
                         str(resp.json()["error"]))
        return True
    def get_context_models_to_delete(self, *args, **options):
        '''Get models to delete'''
        ct = options['content_type']
        retDict = dict()
        retDict['acknowledged'] = 0
        logger.debug(ct)
        idx_key = 'CP_STATS_UD'
        idx = ElasticSettings.idx(idx_key)
        ''' Check if an index type exists in elastic and later check there is a contenttype/model for the given elastic index type. '''  # @IgnorePep8
        elastic_url = ElasticSettings.url()
        url = idx + '/_mapping'
        response = Search.elastic_request(elastic_url, url, is_post=False)
        if "error" in response.json():
            logger.warn(response.json())
            retDict['errorMsg'] = response.json()
            self.stdout.write(json.dumps(retDict))

        # get idx_types from _mapping
        elastic_mapping = json.loads(response.content.decode("utf-8"))
        ## fix needed if we deploy aliasing for indices
        idx = list(elastic_mapping.keys())[0]
        idx_types = list(elastic_mapping[idx]['mappings'].keys())
        logger.debug(idx_types)

        # add idx_types that have no docs
        for idx_type in idx_types:
            if idx_type != ct:
                continue
            logger.debug("Found " + idx_type + "  equal to " + ct)
            ndocs = Search(idx=idx, idx_type=idx_type).get_count()['count']
            #logger.debug(Search(idx=idx, idx_type=idx_type).get_json_response())
            logger.debug("WE have " + str(ndocs))
            if (ndocs > 0):
                for cnt in ContentType.objects.filter():
                    if str(cnt.name).endswith(ct + '_idx_type'):
                        logger.debug(
                            'Matched, finding permissions for %s  %s' %
                            (str(cnt.name), str(cnt.id)))
                        logger.debug("deleting %s" % ct)
                        cnt.delete()
        retDict['acknowledged'] = 1
        #logger.debug(retDict)
        self.stdout.write(json.dumps(retDict))
 def add_arguments(self, parser):
     parser.add_argument('snapshot', type=str, help='Snapshot to restore.')
     parser.add_argument('--url',
                         dest='url',
                         default=ElasticSettings.url(),
                         metavar="ELASTIC_URL",
                         help='Elastic URL to restore to.')
     parser.add_argument('--repo',
                         dest='repo',
                         default=ElasticSettings.getattr('REPOSITORY'),
                         metavar=ElasticSettings.getattr('REPOSITORY'),
                         help='Repository name')
     parser.add_argument(
         '--indices',
         dest='indices',
         default=None,
         metavar="idx1,idx2",
         help=
         'Indices (comma separated) to be restored from a snapshot (default all).'
     )
 def test_server(self):
     ''' Test elasticsearch server is running and status '''
     try:
         url = ElasticSettings.url() + '/_cluster/health/'
         resp = requests.get(url)
         self.assertEqual(resp.status_code, 200, "Health page status code")
         if resp.json()['status'] == 'red':  # allow status to recover if necessary
             for _ in range(3):
                 time.sleep(1)
                 resp = requests.get(url)
                 if resp.json()['status'] != 'red':
                     break
         self.assertFalse(resp.json()['status'] == 'red', 'Health report - red')
     except requests.exceptions.Timeout:
         self.assertTrue(False, 'timeout exception')
     except requests.exceptions.TooManyRedirects:
         self.assertTrue(False, 'too many redirects exception')
     except requests.exceptions.ConnectionError:
         self.assertTrue(False, 'request connection exception')
     except requests.exceptions.RequestException:
         self.assertTrue(False, 'request exception')
    def get_elastic_settings_with_user_uploads(cls, elastic_dict=None):
        '''Get the updated elastic settings with user uploaded idx_types'''

        idx_key = 'CP_STATS_UD'
        idx = ElasticSettings.idx(idx_key)

        ''' Check if an index exists. '''
        elastic_url = ElasticSettings.url()
        url = idx + '/_mapping'
        response = Search.elastic_request(elastic_url, url, is_post=False)

        if "error" in response.json():
            logger.warn(response.json())
            return None

        # get idx_types from _mapping
        elastic_mapping = json.loads(response.content.decode("utf-8"))
        idx_types = list(elastic_mapping[idx]['mappings'].keys())

        if elastic_dict is None:
            elastic_dict = ElasticSettings.attrs().get('IDX')

        idx_type_dict = {}

        for idx_type in idx_types:

            meta_url = idx + '/' + idx_type + '/_meta/_source'
            meta_response = Search.elastic_request(elastic_url, meta_url, is_post=False)

            try:
                elastic_meta = json.loads(meta_response.content.decode("utf-8"))
                label = elastic_meta['label']
            except:
                label = "UD-" + idx_type

            idx_type_dict['UD-' + idx_type.upper()] = {'label': label, 'type': idx_type}

        elastic_dict['CP_STATS_UD']['idx_type'] = idx_type_dict
        return elastic_dict
Exemple #38
0
    def test_create_restore_delete_snapshot(self):
        self.wait_for_running_snapshot()
        snapshot = 'test_' + ElasticSettings.getattr('TEST')
        repo = SnapshotTest.TEST_REPO

        # create a snapshot
        call_command('snapshot',
                     snapshot,
                     indices=IDX['MARKER']['indexName'],
                     repo=repo)
        Snapshot.wait_for_snapshot(repo, snapshot)
        self.assertTrue(Snapshot.exists(repo, snapshot),
                        "Created snapshot " + snapshot)
        # snapshot already exist so return false
        self.assertFalse(
            Snapshot.create_snapshot(repo, snapshot,
                                     IDX['MARKER']['indexName']))

        # delete index
        requests.delete(ElasticSettings.url() + '/' +
                        IDX['MARKER']['indexName'])
        self.assertFalse(Search.index_exists(IDX['MARKER']['indexName']),
                         "Removed index")
        # restore from snapshot
        call_command('restore_snapshot',
                     snapshot,
                     repo=repo,
                     indices=IDX['MARKER']['indexName'])
        Search.index_refresh(IDX['MARKER']['indexName'])
        self.assertTrue(Search.index_exists(IDX['MARKER']['indexName']),
                        "Restored index exists")

        # remove snapshot
        call_command('snapshot', snapshot, delete=True, repo=repo)
        Snapshot.wait_for_snapshot(repo, snapshot, delete=True, count=10)
        self.assertFalse(Snapshot.exists(repo, snapshot),
                         "Deleted snapshot " + snapshot)
Exemple #39
0
 def idx(cls, disease_f, idx, idx_type):
     ''' Parse and load data for cytobands. '''
     for line in disease_f:
         line = line.strip()
         if line.startswith("#"):
             continue
         parts = re.split('\t', line)
         data = {
             "name": parts[0],
             "code": parts[2].lower(),
             "description": parts[1],
             "colour": parts[3],
             "tier": int(parts[4])
         }
         data['suggest'] = {}
         data['suggest']["input"] = [parts[2].lower(), parts[0]]
         data['suggest']["weight"] = 250
         resp = requests.put(ElasticSettings.url()+'/' +
                             idx+'/'+idx_type+'/'+parts[2].lower(),
                             data=json.dumps(data))
         if resp.status_code == 201:
             logger.debug("Loaded "+parts[0])
         else:
             logger.error("Problem loading "+parts[0])
Exemple #40
0
def tearDownModule():
    ''' Remove loaded test indices and test repository. '''
    for key in IDX:
        requests.delete(ElasticSettings.url() + '/' + IDX[key]['indexName'])
    call_command('repository', SnapshotTest.TEST_REPO, delete=True)
def tearDownModule():
    ''' Remove test indices '''
    requests.delete(ElasticSettings.url() + '/' + IDX['MARKER']['indexName'])
Exemple #42
0
def chicpeaDeleteUD(request, url):
    queryDict = request.POST
    idx_type = queryDict.get("userDataIdx")
    idx = getattr(chicp_settings, 'CHICP_IDX').get('userdata').get('INDEX')
    output = subprocess.check_output("curl -XDELETE '"+ElasticSettings.url()+"/"+idx+"/"+idx_type+"'", shell=True)
    return HttpResponse(output, content_type="application/json")
Exemple #43
0
from elastic.elastic_settings import ElasticSettings

OVERRIDE_SETTINGS_CHICP = \
    {'default': {
        'ELASTIC_URL': ElasticSettings.url(),
        'IDX': {
            'CP_STATS_UD': {
                'name': 'cp:hg19_userdata_bed',
                'label': 'User Data',
                'idx_type': {},
                },
            'CP_STATS_IC': {
                'name': 'cp:hg19_immunochip_bed',
                'label': 'ImmunoChip',
                'idx_type': {
                    'IC-ATD_COOPER': {'label': "ATD - Cooper et al.", 'type': 'atd_cooper', 'auth_public': True},
                    'IC-CEL_TRYNKA': {'label': "CEL - Trynka et al.", 'type': 'cel_trynka', 'auth_public': True},
                    'IC-JIA_HINKS_UK': {'label': "JIA - Hinks et al. UK", 'type': 'jia_hinks_uk'},
                    'IC-MS_IMSGC': {'label': "MS - IMSGC et al.", 'type': 'ms_imsgc'},
                    'IC-NAR_FARACO': {'label': "NAR - Faraco et al.", 'type': 'nar_faraco'},
                    'IC-PBC_LIU': {'label': "PBC - Liu et al.", 'type': 'pbc_liu', 'auth_public': True},
                    'IC-RA_EYRE': {'label': "RA - Eyre et al.", 'type': 'ra_eyre', 'auth_public': True},
                    'IC-T1D_ONENGUT': {'label': 'T1D - Onengut et al.', 'type': 't1d_onengut', 'auth_public': True},
                },
                'auth_public': True,
            },
            'CP_STATS_GWAS': {
                'name': 'cp:hg19_gwas_bed',
                'label': 'GWAS Statistic',
                'idx_type': {
                    'GWAS-DUBOIS': {'label': 'CEL - Dubois et al.', 'type': 'cel_dubois',
Exemple #44
0
 def test_settings(self):
     ''' Test elastic server is running. '''
     resp = requests.get(ElasticSettings.url())
     self.assertEqual(resp.status_code, 200)
    def marker_is_gwas_significant_in_ic(cls, hit, section=None, config=None, result_container={}):
        """
        /hg38_gwas_statistics,hg38_ic_statistics/_search?pretty' -d '{"query":{"range":{"p_value":{"lt": 0.00000005}}}}'
        """

        gw_sig_p = 0.00000005
        feature_doc = hit["_source"]
        feature_doc["_id"] = hit["_id"]

        idx = hit["_index"]
        idx_type = hit["_type"]

        # get meta data
        # studyid and diseaes
        elastic_url = ElasticSettings.url()
        meta_url = idx + "/" + idx_type + "/_mapping"
        meta_response = Search.elastic_request(elastic_url, meta_url, is_post=False)

        try:
            elastic_meta = json.loads(meta_response.content.decode("utf-8"))
            meta_info = elastic_meta[idx]["mappings"][idx_type]["_meta"]
            disease = meta_info["disease"]
            dil_study_id = meta_info["study"]
        except:
            disease = None
            dil_study_id = None

        marker = None
        if "marker" in feature_doc:
            marker = feature_doc["marker"]

        if marker is None or disease is None:
            return result_container

        p_val = feature_doc["p_value"]
        if p_val is None:
            return result_container
        global counter
        counter = counter + 1

        p_val_to_compare = float(p_val)
        if p_val_to_compare < gw_sig_p:
            if dil_study_id is None or dil_study_id == "None":
                first_author = "NA"
                dil_study_id = "NA"
            else:
                query = ElasticQuery(Query.ids([dil_study_id]))
                elastic = Search(search_query=query, idx=ElasticSettings.idx("STUDY", "STUDY"), size=1)
                study_doc = elastic.search().docs[0]
                author = getattr(study_doc, "authors")[0]
                first_author = author["name"] + " " + author["initials"]

            fnotes = {
                "linkdata": "pval",
                "linkvalue": p_val_to_compare,
                "linkid": dil_study_id,
                "linkname": first_author,
            }
            result_container_populated = cls.populate_container(
                dil_study_id,
                first_author,
                fnotes=fnotes,
                features=[marker],
                diseases=[disease],
                result_container=result_container,
            )
            return result_container_populated
        else:
            return result_container
Exemple #46
0
    def filter_queryset(self, request, queryset, view):
        ''' Get disease regions. '''
        try:
            filterable = getattr(view, 'filter_fields', [])
            filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])
            dis = filters.get('disease', 'T1D')
            show_genes = filters.get('genes', False)
            show_markers = filters.get('markers', False)
            show_regions = filters.get('regions', True)

            build = self._get_build(filters.get('build', settings.DEFAULT_BUILD))
            docs = DiseaseLocusDocument.get_disease_loci_docs(dis)
            if len(docs) == 0:
                messages.error(request, 'No regions found for '+dis+'.')

            visible_hits = DiseaseLocusDocument.get_hits([h for r in docs for h in getattr(r, 'hits')])
            regions = []
            all_markers = []
            all_genes = []
            ens_all_cand_genes = []
            for r in docs:
                region = r.get_disease_region(visible_hits, build=build)
                if region is not None:
                    ens_all_cand_genes.extend(region['ens_cand_genes'])
                    all_markers.extend(region['markers'])
                    region['hits'] = [self._study_hit_obj(s, region) for s in
                                      StudyHitDocument.process_hits(r.hit_docs, region['all_diseases'])]

                    (all_coding, all_non_coding) = views.get_genes_for_region(getattr(r, "seqid"),
                                                                              region['rstart']-500000,
                                                                              region['rstop']+500000)
                    (region_coding, coding_up, coding_down) = views._region_up_down(all_coding, region['rstart'],
                                                                                    region['rstop'])
                    (region_non_coding, non_coding_up, non_coding_down) = \
                        views._region_up_down(all_non_coding, region['rstart'], region['rstop'])
                    region['genes'] = {
                        'upstream': {'coding': [g.doc_id() for g in coding_up],
                                     'non_coding': [g.doc_id() for g in non_coding_up]},
                        'region': {'coding': [g.doc_id() for g in region_coding],
                                   'non_coding': [g.doc_id() for g in region_non_coding]},
                        'downstream': {'coding': [g.doc_id() for g in coding_down],
                                       'non_coding': [g.doc_id() for g in non_coding_down]},
                    }
                    all_genes.extend(region['genes']['region']['coding'])
                    all_genes.extend(region['genes']['region']['non_coding'])
                    regions.append(region)

            # look for pleiotropy by looking for diseases for the markers in IC_STATS and other study hits
            stats_query = ElasticQuery.filtered(Query.terms("marker", all_markers),
                                                Filter(RangeQuery("p_value", lte=5E-08)))
            stats_docs = Search(stats_query, idx=ElasticSettings.idx("IC_STATS"), size=len(all_markers)).search().docs
            meta_response = Search.elastic_request(ElasticSettings.url(), ElasticSettings.idx("IC_STATS") + '/_mapping',
                                                   is_post=False)
            # get ensembl to gene symbol mapping for all candidate genes
            extra_markers = []
            for region in regions:
                # add diseases from IC/GWAS stats
                (study_ids, region['marker_stats']) = views._process_stats(stats_docs, region['markers'], meta_response)
                region['all_diseases'].extend([getattr(mstat, 'disease') for mstat in region['marker_stats']])

                other_hits_query = ElasticQuery(
                        BoolQuery(must_arr=[RangeQuery("tier", lte=2), Query.terms("marker", region['markers'])],
                                  must_not_arr=[Query.terms("dil_study_id", study_ids)]))
                other_hits = Search(other_hits_query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'),
                                    size=100).search()
                region['extra_markers'] = [self._study_hit_obj(s, region) for s in
                                           StudyHitDocument.process_hits(other_hits.docs, region['all_diseases'])]
                region['all_diseases'] = list(set(region['all_diseases']))
                extra_markers.extend([m['marker_id'] for m in region['extra_markers']])

            # get markers
            marker_objs = []
            if show_markers:
                query = ElasticQuery(Query.terms("id", all_markers), sources=['id', 'start'])
                marker_docs = Search(search_query=query, idx=ElasticSettings.idx('MARKER', 'MARKER'),
                                     size=len(all_markers)).search().docs
                mids = {getattr(m, 'id'): getattr(m, 'start') for m in marker_docs}
                marker_objs = [h for r in regions for h in r['hits']]
                marker_objs.extend([h for r in regions for h in r['extra_markers']])
                for m in marker_objs:
                    m['start'] = mids[m['marker_id']]

            # get genes
            gene_objs = []
            if show_genes:
                all_genes.extend(ens_all_cand_genes)
                gene_docs = GeneDocument.get_genes(all_genes, sources=['start', 'stop', 'chromosome',
                                                                       'symbol', 'biotype'])
                for doc in Document.sorted_alphanum(gene_docs, 'chromosome'):
                    ensembl_id = doc.doc_id()
                    region_name = ''
                    candidate_gene = 0
                    for region in regions:
                        if ('genes' in region and
                            (ensembl_id in region['genes']['region']['coding'] or
                             ensembl_id in region['genes']['region']['non_coding'] or
                             ensembl_id in region['ens_cand_genes'])):
                            region_name = region['region_name']
                            candidate_gene = 1 if ensembl_id in region['ens_cand_genes'] else 0
                            break
                    gene_objs.append({
                        'ensembl_id': ensembl_id,
                        'seqid': 'chr'+getattr(doc, 'chromosome'),
                        'start': getattr(doc, 'start'),
                        'end': getattr(doc, 'stop'),
                        'symbol': getattr(doc, 'symbol'),
                        'biotype': getattr(doc, 'biotype'),
                        'region_name': region_name,
                        'candidate_gene': candidate_gene
                    })
            if show_regions == 'false':
                regions = []
            regions.extend(gene_objs)
            regions.extend(marker_objs)
            return regions
        except (TypeError, ValueError, IndexError, ConnectionError) as e:
            print(e)
            raise Http404
Exemple #47
0
    def get_disease(cls, request, disease, context):
        disease = disease.lower()
        if disease is None:
            messages.error(request, 'No disease given.')
            raise Http404()
        query = ElasticQuery(Query.terms("code", [disease.split(',')]))
        elastic = Search(query, idx=ElasticSettings.idx('DISEASE', 'DISEASE'), size=5)
        res = elastic.search()
        if res.hits_total == 0:
            messages.error(request, 'Disease(s) '+disease+' not found.')
        elif res.hits_total < 9:
            disease_docs = res.docs
            names = ', '.join([getattr(doc, 'name') for doc in disease_docs])

            meta_response = Search.elastic_request(ElasticSettings.url(), ElasticSettings.idx("IC_STATS") + '/_mapping',
                                                   is_post=False)
            elastic_meta = json.loads(meta_response.content.decode("utf-8"))
            disease_docs = res.docs
            for dis in disease_docs:
                dis_code = getattr(dis, 'code').upper()
                docs = DiseaseLocusDocument.get_disease_loci_docs(dis_code)
                regions = []
                ens_all_cand_genes = []
                all_markers = []
                for r in docs:
                    region = r.get_disease_region()
                    if region is not None:
                        regions.append(region)
                        ens_all_cand_genes.extend(region['ens_cand_genes'])
                        all_markers.extend(region['markers'])

                # get ensembl to gene symbol mapping for all candidate genes
                all_cand_genes = gene.utils.get_gene_docs_by_ensembl_id(ens_all_cand_genes)
                for region in regions:
                    region['cand_genes'] = {cg: all_cand_genes[cg] for cg in region.pop("ens_cand_genes", None)}
                setattr(dis, 'regions', regions)

                # look for pleiotropy by looking for diseases for the markers in IC_STATS and other study hits
                stats_query = ElasticQuery.filtered(Query.terms("marker", all_markers),
                                                    Filter(RangeQuery("p_value", lte=5E-08)), sources=['marker'])
                stats_docs = Search(stats_query, idx=ElasticSettings.idx("IC_STATS"),
                                    size=len(all_markers)).search().docs

                other_hits_query = ElasticQuery(
                        BoolQuery(must_arr=[RangeQuery("tier", lte=2), Query.terms("marker", all_markers)]),
                        sources=['marker', 'disease'])
                other_hits = Search(other_hits_query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'),
                                    size=5000).search().docs

                for region in regions:
                    diseases = [dis_code]
                    for doc in stats_docs:
                        if getattr(doc, 'marker') in region['markers']:
                            meta_info = elastic_meta[doc.index()]['mappings'][doc.type()]['_meta']
                            if meta_info['disease'] not in diseases:
                                diseases.append(meta_info['disease'])

                    for doc in other_hits:
                        if getattr(doc, 'marker') in region['markers']:
                            if doc.disease is not None and doc.disease not in diseases:
                                diseases.append(doc.disease)
                    region['diseases'] = diseases

                studies = StudyDocument.get_studies(disease_code=dis_code)
                for doc in studies:
                    setattr(doc, 'study_id', getattr(doc, 'study_id').replace('GDXHsS00', ''))
                    pmid = getattr(doc, 'principal_paper')
                    pubs = PublicationDocument.get_publications(pmid, sources=['date', 'authors.name', 'journal'])
                    if len(pubs) > 0:
                        authors = getattr(pubs[0], 'authors')
                        setattr(doc, 'date', getattr(pubs[0], 'date'))
                        setattr(doc, 'journal', getattr(pubs[0], 'journal'))
                        setattr(doc, 'author', authors[0]['name'].rsplit(None, 1)[-1] if authors else "")
                setattr(dis, 'studies',  studies)

            context['features'] = disease_docs
            context['title'] = names
            return context
        raise Http404()
Exemple #48
0
 def tearDownIdx(cls, idx_name_arr):
     ''' Remove indices by their key names (e.g. ['GENE', 'DIISEASE', ...]). '''
     for name in idx_name_arr:
         requests.delete(ElasticSettings.url() + '/' + PydginTestSettings.IDX[name]['indexName'])
Exemple #49
0
    def get_regions(cls, request, dis, context):
        # is_authenticated = False
        elastic_url = ElasticSettings.url()

        (core, other) = Disease.get_site_diseases(dis_list=dis.upper().split(','))
        if len(core) == 0 and len(other) == 0:
            messages.error(request, 'Disease '+dis+' not found.')
            raise Http404()

        disease = core[0] if len(core) > 0 else other[0]
        context['title'] = getattr(disease, "name")+" Regions"

        docs = DiseaseLocusDocument.get_disease_loci_docs(dis)
        if len(docs) == 0:
            messages.error(request, 'No regions found for '+dis+'.')
            raise Http404()

        visible_hits = DiseaseLocusDocument.get_hits([h for r in docs for h in getattr(r, 'hits')])
        meta_response = Search.elastic_request(elastic_url, ElasticSettings.idx("IC_STATS") + '/_mapping',
                                               is_post=False)
        regions = []
        ens_all_cand_genes = []
        all_markers = []
        for r in docs:
            region = r.get_disease_region(visible_hits)
            if region is not None:
                ens_all_cand_genes.extend(region['ens_cand_genes'])
                all_markers.extend(region['markers'])
                region['hits'] = StudyHitDocument.process_hits(r.hit_docs, region['all_diseases'])

                (all_coding, all_non_coding) = get_genes_for_region(getattr(r, "seqid"),
                                                                    region['rstart']-500000, region['rstop']+500000)
                (region_coding, coding_up, coding_down) = _region_up_down(all_coding, region['rstart'], region['rstop'])
                (region_non_coding, non_coding_up, non_coding_down) = \
                    _region_up_down(all_non_coding, region['rstart'], region['rstop'])
                region['genes'] = {
                    'upstream': {'coding': coding_up, 'non_coding': non_coding_up},
                    'region': {'coding': region_coding, 'non_coding': region_non_coding},
                    'downstream': {'coding': coding_down, 'non_coding': non_coding_down},
                }
                regions.append(region)

        # look for pleiotropy by looking for diseases for the markers in IC_STATS and other study hits
        stats_query = ElasticQuery.filtered(Query.terms("marker", all_markers),
                                            Filter(RangeQuery("p_value", lte=5E-08)))
        stats_docs = Search(stats_query, idx=ElasticSettings.idx("IC_STATS"), size=len(all_markers)).search().docs

        # get ensembl to gene symbol mapping for all candidate genes
        all_cand_genes = gene.utils.get_gene_docs_by_ensembl_id(ens_all_cand_genes)
        for region in regions:
            region['cand_genes'] = {cg: all_cand_genes[cg] for cg in region.pop("ens_cand_genes", None)}
            (study_ids, region['marker_stats']) = _process_stats(stats_docs, region['markers'], meta_response)

            # add diseases from IC/GWAS stats
            region['all_diseases'].extend([getattr(mstat, 'disease') for mstat in region['marker_stats']])

            other_hits_query = ElasticQuery(
                        BoolQuery(must_arr=[RangeQuery("tier", lte=2), Query.terms("marker", region['markers'])],
                                  must_not_arr=[Query.terms("dil_study_id", study_ids)]))
            other_hits = Search(other_hits_query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'), size=100).search()
            region['extra_markers'] = StudyHitDocument.process_hits(other_hits.docs, region['all_diseases'])

        context['regions'] = regions
        context['disease_code'] = [dis]
        context['disease'] = getattr(disease, "name")
        return context
    def get_elastic_settings_with_user_uploads(cls,
                                               elastic_dict=None,
                                               new_upload_file=None):
        '''Get the updated elastic settings with user uploaded idx_types'''

        idx_key = 'CP_STATS_UD'
        idx = ElasticSettings.idx(idx_key)
        ''' Check if an index type exists in elastic and later check there is a contenttype/model for the given elastic index type. '''  # @IgnorePep8
        elastic_url = ElasticSettings.url()
        url = idx + '/_mapping'
        response = Search.elastic_request(elastic_url, url, is_post=False)
        ''' why don't we use Search.get_mapping ? I guess it's not a class method'''
        #logger.debug(response.json())
        if "error" in response.json():
            logger.warn(response.json())
            return None

        # get idx_types from _mapping
        elastic_mapping = json.loads(response.content.decode("utf-8"))
        # here if we use aliasing then idx can be different
        # this causes problems as it's effectively hardcoded
        # this should fix to handle things where aliases are deployed
        idx = list(elastic_mapping.keys())[0]
        idx_types = list(elastic_mapping[idx]['mappings'].keys())

        if elastic_dict is None:
            elastic_dict = ElasticSettings.attrs().get('IDX')

        idx_type_dict = {}

        existing_ct = [
            ct.name for ct in ContentType.objects.filter(
                app_label=cls.PERMISSION_MODEL_APP_NAME)
        ]

        for idx_type in idx_types:

            idx_type_with_suffix = idx_type + cls.PERMISSION_MODEL_TYPE_SUFFIX

            for ct in existing_ct:
                if ct.endswith(idx_type_with_suffix):

                    meta_url = idx + '/' + idx_type + '/_meta/_source'
                    meta_response = Search.elastic_request(elastic_url,
                                                           meta_url,
                                                           is_post=False)

                    try:
                        elastic_meta = json.loads(
                            meta_response.content.decode("utf-8"))
                        label = elastic_meta['label']
                    except:
                        label = "UD-" + idx_type

                    idx_type_dict['UD-' + idx_type.upper()] = {
                        'label': label,
                        'type': idx_type
                    }

        if new_upload_file is not None:
            idx_type = new_upload_file
            label = "UD-" + idx_type
            idx_type_dict['UD-' + idx_type.upper()] = {
                'label': label,
                'type': idx_type
            }

        elastic_dict['CP_STATS_UD']['idx_type'] = idx_type_dict
        return elastic_dict
            )
        elastic2 = Search(search_query=query2, idx=idx, idx_type='hits',
                          size=int(disease_bucket['doc_count']+1), qsort=Sort(buildSort))
        results = elastic2.search()
        minPos = 0
        maxPos = 0
        tier = 4
        weight = 0
        regionCount = 1
        regionName = ''
        species = ''
        doc_ids = []
        if len(results.docs) > 0:
            for doc in results.docs:
                # print(doc)
                os.system("curl -XPOST '"+ElasticSettings.url()+"/"+idx+"/hits/" + doc.doc_id() +
                          "/_update?pretty' -d '{\"doc\": {\"disease_locus\": \"TBC\"}}' > /dev/null 2>&1")
                build_info = None
                for b in getattr(doc, 'build_info'):
                    if b['build'] == build:
                        build_info = b
                if build_info is None:
                    print("ERROR - no build information found for b"+str(build))
                    continue

                # print(getattr(doc, "disease")+"\t"+getattr(doc, "marker")+"\t" + getattr(doc, "chr_band") + "\t" +
                #      build_info['seqid'] + "\t" + str(build_info['start']) + "\t" + str(build_info['end']))
                if minPos == 0 and maxPos == 0:
                    minPos = build_info['start']
                    maxPos = build_info['end']
def tearDownModule():
    ''' Remove test indices '''
    requests.delete(ElasticSettings.url() + '/' + IDX['MARKER']['indexName'])
    requests.delete(ElasticSettings.url() + '/' + IDX['GFF_GENERIC']['indexName'])
    requests.delete(ElasticSettings.url() + '/' + IDX['JSON_NESTED']['indexName'])
Exemple #53
0
    def process_criteria(cls, feature, section, config, sub_class, test=False):
        ''' Top level function that calls the right criteria implementation based on the subclass passed. Iterates over all the
            documents using the ScanAndScroll and the hits are processed by the inner function process_hits.
            The entire result is stored in result_container (a dict), and at the end of the processing, the result is
            loaded in to the elastic index after creating the mapping
        @type  feature: string
        @param feature: feature type, could be 'gene','region', 'marker' etc.,
        @type  section: string
        @keyword section: The section in the criteria.ini file
        @type  config:  string
        @keyword config: The config object initialized from criteria.ini.
        @type  sub_class: string
        @param sub_class: The name of the inherited sub_class where the actual implementation is
        '''
        global gl_result_container
        gl_result_container = {}
        test_mode = test
        if config is None:
            if test_mode:
                config = CriteriaManager().get_criteria_config(ini_file='test_criteria.ini')
            else:
                config = CriteriaManager().get_criteria_config(ini_file='criteria.ini')

        section_config = config[section]
        source_idx = section_config['source_idx']

        if ',' in source_idx:
            idxs = source_idx.split(',')
            idx_all = [ElasticSettings.idx(idx) for idx in idxs]
            source_idx = ','.join(idx_all)
        else:
            source_idx = ElasticSettings.idx(section_config['source_idx'])

        source_idx_type = None
        if 'source_idx_type' in section_config:
            source_idx_type = section_config['source_idx_type']

        if source_idx_type is not None:
            source_idx = ElasticSettings.idx(section_config['source_idx'], idx_type=section_config['source_idx_type'])
        else:
            source_idx_type = ''

        logger.warning(source_idx + ' ' + source_idx_type)

        def process_hits(resp_json):
            global gl_result_container
            hits = resp_json['hits']['hits']
            global hit_counter
            for hit in hits:
                hit_counter = hit_counter + 1

                result_container = sub_class.tag_feature_to_disease(hit, section, config,
                                                                    result_container=gl_result_container)
                gl_result_container = result_container

                if test_mode:
                    if gl_result_container is not None and len(gl_result_container) > 5:
                        return

        query = cls.get_elastic_query(section, config)

        if test_mode:
            result_size = len(gl_result_container)
            from_ = 0
            size_ = 20
            while (result_size < 1):
                from_ = from_ + size_
                url = ElasticSettings.url()
                if 'mhc' in section:
                    url_search = (source_idx + '/_search')
                else:
                    url_search = (source_idx + '/_search?from=' + str(from_) + '&size=' + str(size_))

                if query is None:
                    query = {
                              "query": {"match_all": {}},
                              "size":  20
                              }
                    response = Search.elastic_request(url, url_search, data=json.dumps(query))
                    query = None
                else:
                    # print(query)
                    response = Search.elastic_request(url, url_search, data=json.dumps(query.query))

                process_hits(response.json())
                if gl_result_container is not None:
                    result_size = len(gl_result_container)
        else:
            ScanAndScroll.scan_and_scroll(source_idx, call_fun=process_hits, query=query)

        cls.map_and_load(feature, section, config, gl_result_container)
def tearDownModule():
    ''' Remove test indices '''
    requests.delete(ElasticSettings.url() + '/' + IDX['MARKER']['indexName'])
Exemple #55
0
def chicpeaDeleteUD(request, url):
    queryDict = request.POST
    idx_type = queryDict.get("userDataIdx")
    idx = ElasticSettings.idx('CP_STATS_UD')
    output = subprocess.check_output("curl -XDELETE '"+ElasticSettings.url()+"/"+idx+"/"+idx_type+"'", shell=True)
    return HttpResponse(output, content_type="application/json")