コード例 #1
0
    def test_create_idx_type_model_permissions(self):
        elastic_settings_before = ElasticSettings.attrs().get('IDX')
        user_types_before = elastic_settings_before['CP_STATS_UD']['idx_type']
        self.assertEqual({}, user_types_before,
                         'CP_STATS_UD idx_type is empty')

        idx = "cp:hg19_userdata_bed"
        new_upload_file = "tmp_newly_uploaded_file"
        idx_type = new_upload_file

        os.system(
            "curl -XPUT " + ElasticSettings.url() + "/" + idx + "/_mapping/" +
            idx_type + " -d '{\"" + idx_type +
            "\":{ \"properties\" : {\"message\" : {\"type\" : \"string\", \"store\" : true } } }}'"
        )

        os.system("curl -XPUT " + ElasticSettings.url() + "/" + idx + "/" +
                  idx_type + "/_meta -d '{\"label\": \"" + new_upload_file +
                  "\", \"owner\": \"" + self.user.username +
                  "\", \"uploaded\": \"" + str(timezone.now()) + "\"}'")

        elastic_settings_after = elastic_factory.create_idx_type_model_permissions(
            self.user,
            indexKey='CP_STATS_UD',
            indexTypeKey='UD-' + new_upload_file.upper(),  # @IgnorePep8
            new_upload_file="tmp_newly_uploaded_file")  # @IgnorePep8

        # elastic_settings_after = elastic_factory.get_elastic_settings_with_user_uploads(elastic_settings_before)
        user_types_after = elastic_settings_after['CP_STATS_UD']['idx_type']
        self.assertTrue(len(user_types_after) > 0, "Has user idx_types ")
        self.assertTrue('UD-TMP_NEWLY_UPLOADED_FILE' in user_types_after)
        self.assertEqual(
            user_types_after['UD-TMP_NEWLY_UPLOADED_FILE']['type'],
            'tmp_newly_uploaded_file')
コード例 #2
0
    def tearDown(self):
        ''' Remove loaded test indices and test repository. '''

        key = 'PRIVATE_REGIONS_GFF'
        if key in IDX.keys():
            print(ElasticSettings.url() + '/' + IDX[key]['indexName'])
            requests.delete(ElasticSettings.url() + '/' + IDX[key]['indexName'])
コード例 #3
0
    def test_create_idx_type_model_permissions(self):
        elastic_settings_before = ElasticSettings.attrs().get('IDX')
        user_types_before = elastic_settings_before['CP_STATS_UD']['idx_type']
        self.assertEqual({}, user_types_before, 'CP_STATS_UD idx_type is empty')

        idx = "cp:hg19_userdata_bed"
        new_upload_file = "tmp_newly_uploaded_file"
        idx_type = new_upload_file

        os.system("curl -XPUT "+ElasticSettings.url()+"/"+idx+"/_mapping/"+idx_type+" -d '{\"" +
                  idx_type + "\":{ \"properties\" : {\"message\" : {\"type\" : \"string\", \"store\" : true } } }}'")

        os.system("curl -XPUT "+ElasticSettings.url()+"/"+idx+"/"+idx_type+"/_meta -d '{\"label\": \"" +
                  new_upload_file + "\", \"owner\": \""+self.user.username+"\", \"uploaded\": \"" +
                  str(timezone.now())+"\"}'")

        elastic_settings_after = elastic_factory.create_idx_type_model_permissions(self.user,
                                                                                   indexKey='CP_STATS_UD',
                                                                                   indexTypeKey='UD-'+new_upload_file.upper(),  # @IgnorePep8
                                                                                   new_upload_file="tmp_newly_uploaded_file")  # @IgnorePep8

        # elastic_settings_after = elastic_factory.get_elastic_settings_with_user_uploads(elastic_settings_before)
        user_types_after = elastic_settings_after['CP_STATS_UD']['idx_type']
        self.assertTrue(len(user_types_after) > 0, "Has user idx_types ")
        self.assertTrue('UD-TMP_NEWLY_UPLOADED_FILE' in user_types_after)
        self.assertEqual(user_types_after['UD-TMP_NEWLY_UPLOADED_FILE']['type'], 'tmp_newly_uploaded_file')
コード例 #4
0
    def tearDown(self):
        ''' Remove loaded test indices and test repository. '''

        key = 'PRIVATE_REGIONS_GFF'
        if key in IDX.keys():
            print(ElasticSettings.url() + '/' + IDX[key]['indexName'])
            requests.delete(ElasticSettings.url() + '/' +
                            IDX[key]['indexName'])
コード例 #5
0
def tearDownModule():
    if os.path.exists(TEST_DATA_DIR + "/STAGE"):
        shutil.rmtree(TEST_DATA_DIR + "/STAGE")
    # remove index created
    INI_CONFIG = IniParser().read_ini(MY_INI_FILE)
    requests.delete(ElasticSettings.url() + "/" + INI_CONFIG["GENE_HISTORY"]["index"])
    requests.delete(ElasticSettings.url() + "/" + INI_CONFIG["DBSNP"]["index"])
    os.remove(MY_INI_FILE)
    ens_dir = os.path.join(TEST_DATA_DIR, "DOWNLOAD", "ENSMART_GENE")
    if os.path.exists(ens_dir):
        shutil.rmtree(ens_dir)
コード例 #6
0
    def scan_and_scroll(self, idx, call_fun=None, idx_type='', url=None,
                        time_to_keep_scoll=1, query=None):
        ''' Scan and scroll an index and optionally provide a function argument to
        process the hits. '''
        if url is None:
            url = ElasticSettings.url()

        url_search_scan = (idx + '/' + idx_type + '/_search?search_type=scan&scroll=' +
                           str(time_to_keep_scoll) + 'm')
        if query is None:
            query = {
                "query": {"match_all": {}},
                "size":  1000
            }
        else:
            if not isinstance(query, ElasticQuery):
                raise QueryError("not a Query")
            query = query.query

        response = Search.elastic_request(url, url_search_scan, data=json.dumps(query))
        _scroll_id = response.json()['_scroll_id']
        url_scan_scroll = '_search/scroll?scroll=' + str(time_to_keep_scoll) + 'm'

        count = 0
        while True:
            response = Search.elastic_request(url, url_scan_scroll, data=_scroll_id)
            _scroll_id = response.json()['_scroll_id']
            hits = response.json()['hits']['hits']
            nhits = len(hits)
            if nhits == 0:
                break
            count += nhits
            if call_fun is not None:
                call_fun(response.json())
        logger.debug("Scanned No. Docs ( "+idx+"/"+idx_type+" ) = "+str(count))
コード例 #7
0
ファイル: views.py プロジェクト: tcarver/django-chicp
def chicpeaFileUpload(request, url):
    filesDict = request.FILES
    files = filesDict.getlist("files[]")
    print(files)
    snpTracks = list()
    idx = getattr(chicp_settings, 'CHICP_IDX').get('userdata').get('INDEX')

    for f in files:
        line = f.readlines()[0].decode()
        if line.startswith("#"):
            line = f.readlines()[1].decode()

        parts = re.split("\t", line)
        if re.match("\s", line):
            parts = re.split("\s", line)

        if len(parts) != 5:
            logger.warn("WARNING: unexpected number of columns ("+len(parts)+"): "+line)
            continue

        f.seek(0)
        bedFile = NamedTemporaryFile(delete=False)
        bedFile.write(f.read())
        bedFile.close()
        idx_type = os.path.basename(bedFile.name)
        snpTracks.append({"value": idx_type, "text":  f.name})
        os.system("curl -XDELETE '"+ElasticSettings.url()+"/"+idx+"/"+idx_type+"'")
        call_command("index_search", indexName=idx, indexType=idx_type, indexBED=bedFile.name)
        logger.debug("--indexName "+idx+" --indexType "+idx_type+" --indexBED "+bedFile.name)
        bedFile.delete

    context = dict()
    context['userSNPTracks'] = snpTracks
    return HttpResponse(json.dumps(context), content_type="application/json")
コード例 #8
0
def tearDownModule():
    # remove index created
    INI_CONFIG = IniParser().read_ini(MY_PUB_INI_FILE)
    requests.delete(ElasticSettings.url() + '/' + INI_CONFIG['DISEASE']['index'])
    os.remove(MY_PUB_INI_FILE)
    if os.path.exists(TEST_DATA_DIR + '/STAGE'):
        shutil.rmtree(TEST_DATA_DIR + '/STAGE')
コード例 #9
0
 def delete_repository(cls, repo):
     url = ElasticSettings.url() + '/_snapshot/' + repo
     resp = requests.delete(url)
     if resp.status_code != 200:
         logger.error("Status (" + url + "): " + str(resp.status_code) +
                      " :: " + str(resp.json()["error"]))
         return False
     return True
コード例 #10
0
 def get_count(self):
     ''' Return the elastic count for a query result '''
     url = self.idx + '/' + self.idx_type + '/_count?'
     data = {}
     if hasattr(self, 'query'):
         data = json.dumps(self.query)
     response = Search.elastic_request(ElasticSettings.url(), url, data=data)
     return response.json()
コード例 #11
0
 def exists(cls, repo, snapshot):
     ''' Test if the repository/snapshot exists. '''
     url = ElasticSettings.url() + '/_snapshot/' + repo + '/' + snapshot
     resp = requests.get(url)
     if resp.status_code != 200:
         return False
     else:
         return True
コード例 #12
0
    def load(self, idx, idx_type, json_data, elastic_url=None):
        ''' Bulk load documents. '''
        if elastic_url is None:
            elastic_url = ElasticSettings.url()
        resp = requests.put(ElasticSettings.url()+'/' + idx+'/' + idx_type +
                            '/_bulk', data=json_data)
        if(resp.status_code != 200):
            logger.error('ERROR: '+idx+' load status: '+str(resp.status_code)+' '+str(resp.content))

        # report errors found during loading
        if 'errors' in resp.json() and resp.json()['errors']:
            logger.error("ERROR: bulk load error found")
            for item in resp.json()['items']:
                for key in item.keys():
                    if 'error' in item[key]:
                        logger.error("ERROR LOADING:")
                        logger.error(item)
        return resp
コード例 #13
0
    def __init__(self, search_query=None, aggs=None, search_from=0, size=20,
                 search_type=None, idx=ElasticSettings.idx('DEFAULT'), idx_type='',
                 qsort=None, elastic_url=None):
        ''' Set up parameters to use in the search. L{ElasticQuery} is used to
        define a search query.
        @type  search_query: L{ElasticQuery}
        @keyword search_query: The elastic query to search (default: None).
        @type  aggs: L{Aggs}
        @keyword aggs: Aggregations used in the search.
        @type  search_from: integer
        @keyword search_from: Offset used in paginations (default: 0).
        @type  size: integer
        @keyword size: maximum number of hits to return (default: 20).
        @type search_type: bool
        @keyword search_type: Set search type = count for aggregations.
        @type  idx: string
        @keyword idx: index to search (default: default index defined in settings).
        @type  idx_type: string
        @keyword idx_type: index type (default: '').
        @type  qsort: Sort
        @keyword qsort: defines sorting for the query.
        @type  url: string
        @keyword url: Elastic URL (default: default cluster URL).
        '''
        if search_query is not None:
            if not isinstance(search_query, ElasticQuery):
                raise QueryError("not an ElasticQuery")
            self.query = search_query.query

        if aggs is not None:
            if hasattr(self, 'query'):
                self.query.update(aggs.aggs)
            else:
                self.query = aggs.aggs

        if qsort is not None:
            if not isinstance(qsort, Sort):
                raise QueryError("not a Sort")
            if hasattr(self, 'query'):
                self.query.update(qsort.qsort)
            else:
                logger.error("no query to sort")

        if elastic_url is None:
            elastic_url = ElasticSettings.url()

        self.size = size
        self.search_from = search_from
        self.search_type = search_type
        self.idx = idx
        self.idx_type = idx_type
        self.elastic_url = elastic_url
        if self.search_type is None:
            self.url = (self.idx + '/' + self.idx_type +
                        '/_search?size=' + str(self.size) + '&from='+str(self.search_from))
        else:
            self.url = (self.idx + '/' + self.idx_type + '/_search?search_type='+search_type)
コード例 #14
0
    def get_elastic_settings_with_user_uploads(cls, elastic_dict=None, new_upload_file=None):
        '''Get the updated elastic settings with user uploaded idx_types'''

        idx_key = 'CP_STATS_UD'
        idx = ElasticSettings.idx(idx_key)

        ''' Check if an index type exists in elastic and later check there is a contenttype/model for the given elastic index type. '''  # @IgnorePep8
        elastic_url = ElasticSettings.url()
        url = idx + '/_mapping'
        response = Search.elastic_request(elastic_url, url, is_post=False)
        ''' why don't we use Search.get_mapping ? I guess it's not a class method'''
        #logger.debug(response.json())
        if "error" in response.json():
            logger.warn(response.json())
            return None

        # get idx_types from _mapping
        elastic_mapping = json.loads(response.content.decode("utf-8"))
        # here if we use aliasing then idx can be different
        # this causes problems as it's effectively hardcoded
       # this should fix to handle things where aliases are deployed
        idx = list(elastic_mapping.keys())[0]
        idx_types = list(elastic_mapping[idx]['mappings'].keys())

        if elastic_dict is None:
            elastic_dict = ElasticSettings.attrs().get('IDX')

        idx_type_dict = {}

        existing_ct = [ct.name for ct in ContentType.objects.filter(app_label=cls.PERMISSION_MODEL_APP_NAME)]

        for idx_type in idx_types:

            idx_type_with_suffix = idx_type + cls.PERMISSION_MODEL_TYPE_SUFFIX

            for ct in existing_ct:
                if ct.endswith(idx_type_with_suffix):

                    meta_url = idx + '/' + idx_type + '/_meta/_source'
                    meta_response = Search.elastic_request(elastic_url, meta_url, is_post=False)

                    try:
                        elastic_meta = json.loads(meta_response.content.decode("utf-8"))
                        label = elastic_meta['label']
                    except:
                        label = "UD-" + idx_type

                    idx_type_dict['UD-' + idx_type.upper()] = {'label': label, 'type': idx_type}

        if new_upload_file is not None:
            idx_type = new_upload_file
            label = "UD-" + idx_type
            idx_type_dict['UD-' + idx_type.upper()] = {'label': label, 'type': idx_type}

        elastic_dict['CP_STATS_UD']['idx_type'] = idx_type_dict
        return elastic_dict
コード例 #15
0
 def index_exists(cls, idx, idx_type='', url=None):
     ''' Check if an index exists. '''
     if url is None:
         elastic_url = ElasticSettings.url()
     url = idx + '/' + idx_type + '/_mapping'
     response = Search.elastic_request(elastic_url, url, is_post=False)
     if "error" in response.json():
         logger.warning(response.json())
         return False
     return True
コード例 #16
0
 def index_refresh(cls, idx, url=None):
     ''' Refresh to make all operations performed since the last refresh
     available for search'''
     if url is None:
         elastic_url = ElasticSettings.url()
     response = Search.elastic_request(elastic_url, idx + '/_refresh')
     if "error" in response.json():
         logger.warning(response.content.decode("utf-8"))
         return False
     return True
コード例 #17
0
ファイル: criteria.py プロジェクト: D-I-L/django-criteria
    def get_meta_info(cls, idx, idx_type):
        elastic_url = ElasticSettings.url()
        meta_url = idx + '/' + idx_type + '/_mapping'
        # print(elastic_url + meta_url)
        meta_response = Search.elastic_request(elastic_url, meta_url, is_post=False)

        try:
            elastic_meta = json.loads(meta_response.content.decode("utf-8"))
            meta_info = elastic_meta[idx]['mappings'][idx_type]['_meta']
            return meta_info
        except:
            return None
コード例 #18
0
 def is_running(cls, repo=''):
     url = ElasticSettings.url() + '/_snapshot/' + repo + '/_status'
     resp = requests.get(url)
     if resp.status_code != 200:
         logger.debug(url + ' :: ' + resp.status_code)
     else:
         json_resp = resp.json()
         try:
             return len(json_resp['snapshots']) > 0
         except Exception as e:
             logger.error(e)
     return False
コード例 #19
0
def chicpeaFileUpload(request, url):
    filesDict = request.FILES
    user = request.user
    files = filesDict.getlist("files[]")
    snpTracks = list()
    idx = ElasticSettings.idx('CP_STATS_UD')

    for f in files:
        line = f.readlines()[0].decode()
        if line.startswith("#"):
            line = f.readlines()[1].decode()

        parts = re.split("\t", line)
        if re.match("\s", line):
            parts = re.split("\s", line)

        if len(parts) != 5:
            logger.warn("WARNING: unexpected number of columns ("+len(parts)+"): "+line)
            continue

        f.seek(0)
        bedFile = NamedTemporaryFile(delete=False)
        bedFile.write(f.read())
        bedFile.close()
        idx_type = os.path.basename(bedFile.name)
        snpTracks.append({"value": "ud-"+idx_type, "text":  f.name})
        os.system("curl -XDELETE '"+ElasticSettings.url()+"/"+idx+"/"+idx_type+"'")
        call_command("index_search", indexName=idx, indexType=idx_type, indexBED=bedFile.name)
        logger.debug("index_search --indexName "+idx+" --indexType "+idx_type+" --indexBED "+bedFile.name)
        os.system("curl -XPUT "+ElasticSettings.url()+"/"+idx+"/"+idx_type+"/_meta -d '{\"label\": \"" + f.name +
                  "\", \"owner\": \""+user.username+"\", \"uploaded\": \""+str(timezone.now())+"\"}'")
        bedFile.delete
        elastic_factory.create_idx_type_model_permissions(user, indexKey='CP_STATS_UD',
                                                          indexTypeKey='UD-'+idx_type.upper())

    context = dict()
    context['userSNPTracks'] = snpTracks

    return HttpResponse(json.dumps(context), content_type="application/json")
コード例 #20
0
ファイル: views.py プロジェクト: D-I-L/django-chicp
def chicpeaFileUpload(request, url):
    filesDict = request.FILES
    user = request.user
    files = filesDict.getlist("files[]")
    snpTracks = list()
    idx = ElasticSettings.idx('CP_STATS_UD')

    for f in files:
        line = f.readlines()[0].decode()
        if line.startswith("#"):
            line = f.readlines()[1].decode()

        parts = re.split("\t", line)
        if re.match("\s", line):
            parts = re.split("\s", line)

        if len(parts) != 5:
            logger.warn("WARNING: unexpected number of columns ("+len(parts)+"): "+line)
            continue

        f.seek(0)
        bedFile = NamedTemporaryFile(delete=False)
        bedFile.write(f.read())
        bedFile.close()
        idx_type = os.path.basename(bedFile.name)
        snpTracks.append({"value": "ud-"+idx_type, "text":  f.name})
        os.system("curl -XDELETE '"+ElasticSettings.url()+"/"+idx+"/"+idx_type+"'")
        call_command("index_search", indexName=idx, indexType=idx_type, indexBED=bedFile.name)
        logger.debug("index_search --indexName "+idx+" --indexType "+idx_type+" --indexBED "+bedFile.name)
        os.system("curl -XPUT "+ElasticSettings.url()+"/"+idx+"/"+idx_type+"/_meta -d '{\"label\": \"" + f.name +
                  "\", \"owner\": \""+user.username+"\", \"uploaded\": \""+str(timezone.now())+"\"}'")
        bedFile.delete
        elastic_factory.create_idx_type_model_permissions(user, indexKey='CP_STATS_UD',
                                                          indexTypeKey='UD-'+idx_type.upper())

    context = dict()
    context['userSNPTracks'] = snpTracks

    return HttpResponse(json.dumps(context), content_type="application/json")
コード例 #21
0
    def update_doc(cls, doc, part_doc, elastic_url=None):
        ''' Update a document with a partial document.  '''
        if elastic_url is None:
            elastic_url = ElasticSettings.url()
        url = (doc._meta['_index'] + '/' +
               doc.type() + '/' + doc._meta['_id'] + '/_update')
        response = Search.elastic_request(elastic_url, url, data=json.dumps(part_doc))

        logger.debug("curl -XPOST '" + elastic_url + url + "' -d '" + json.dumps(part_doc) + "'")
        if response.status_code != 200:
            logger.warning("Error: elastic response 200:" + url)
            logger.warning(response.json())
        return response.json()
コード例 #22
0
    def test_mapping_parent_child(self):
        ''' Test creating mapping with parent child relationship. '''
        gene_mapping = MappingProperties("gene")
        gene_mapping.add_property("symbol", "string", analyzer="full_name")
        inta_mapping = MappingProperties("publication", "gene")
        load = Loader()
        idx = "test__mapping__"+SEARCH_SUFFIX
        options = {"indexName": idx, "shards": 1}
        requests.delete(ElasticSettings.url() + '/' + idx)

        # add child mappings first
        status = load.mapping(inta_mapping, "publication", analyzer=Loader.KEYWORD_ANALYZER, **options)
        self.assertTrue(status, "mapping inteactions")
        status = load.mapping(gene_mapping, "gene", analyzer=Loader.KEYWORD_ANALYZER, **options)
        self.assertTrue(status, "mapping genes")

        ''' load docs and test has parent query'''
        json_data = '{"index": {"_index": "%s", "_type": "gene", "_id" : "1"}}\n' % idx
        json_data += json.dumps({"symbol": "PAX1"}) + '\n'
        json_data += '{"index": {"_index": "%s", "_type": "publication", "_id" : "2", "parent": "1"}}\n' % idx
        json_data += json.dumps({"pubmed": 1234}) + '\n'
        Bulk.load(idx, '', json_data)
        Search.index_refresh(idx)
        query = ElasticQuery.has_parent('gene', Query.match('symbol', 'PAX1'))
        elastic = Search(query, idx=idx, idx_type='publication', size=500)
        docs = elastic.search().docs
        self.assertEquals(len(docs), 1)
        self.assertEquals(getattr(docs[0], 'pubmed'), 1234)
        self.assertEquals(docs[0].parent(), '1')
        self.assertRaises(QueryError, ElasticQuery.has_parent, 'gene', 'xxxxx')

        ''' test has child query '''
        query = ElasticQuery.has_child('publication', Query.match('pubmed', 1234))
        elastic = Search(query, idx=idx, idx_type='gene', size=500)
        docs = elastic.search().docs
        self.assertEquals(len(docs), 1)
        self.assertEquals(getattr(docs[0], 'symbol'), 'PAX1')
        self.assertEquals(docs[0].parent(), None)
        requests.delete(ElasticSettings.url() + '/' + idx)
コード例 #23
0
 def show(cls, repo, snapshots, all_repos):
     ''' Show the information for the named snapshots. '''
     if all_repos:
         repo = ''
         snapshots = ''
     url = ElasticSettings.url() + '/_snapshot/' + repo + '/' + snapshots
     resp = requests.get(url)
     if resp.status_code != 200:
         logger.error("Returned status (for " + url + "): " +
                      str(resp.status_code))
         logger.error(resp.json()["error"])
         return False
     print(json.dumps(resp.json(), indent=4))
     return True
コード例 #24
0
 def get_mapping(self, mapping_type=None):
     ''' Return the mappings for an index (host:port/{index}/_mapping/{type}). '''
     self.mapping_url = (self.idx + '/_mapping')
     if mapping_type is not None:
         self.mapping_url += '/'+mapping_type
     elif self.idx_type is not None:
         self.mapping_url += '/'+self.idx_type
     response = Search.elastic_request(ElasticSettings.url(), self.mapping_url, is_post=False)
     if response.status_code != 200:
         json_err = json.dumps({"error": response.status_code,
                                "response": response.content.decode("utf-8"),
                                "url": self.mapping_url})
         logger.warning(json_err)
         return json_err
     return response.json()
コード例 #25
0
ファイル: test_utils.py プロジェクト: D-I-L/django-criteria
    def get_criteria_index_types(cls, idx_key):

        idx = ElasticSettings.idx(idx_key)
        elastic_url = ElasticSettings.url()
        url = idx + '/_mappings'
        response = Search.elastic_request(elastic_url, url, is_post=False)

        if "error" in response.json():
            logger.warn(response.json())
            return None

        # get idx_types from _mapping
        elastic_mapping = json.loads(response.content.decode("utf-8"))
        idx_types = list(elastic_mapping[idx]['mappings'].keys())
        return idx_types
コード例 #26
0
    def suggest(cls, term, idx, elastic_url=ElasticSettings.url(),
                name='data', field='suggest', context=None, size=5):
        ''' Auto completion suggestions for a given term. '''
        if elastic_url is None:
            elastic_url = ElasticSettings.url()

        url = (idx + '/' + '/_suggest')
        suggest = {
            name: {
                "text": term,
                "completion": {
                    "field": field,
                    "size": size
                }
            }
        }
        if context is not None:
            suggest[name]['completion'].update(context)
        response = Search.elastic_request(elastic_url, url, data=json.dumps(suggest))
        logger.debug("curl -XPOST '" + elastic_url + '/' + url + "' -d '" + json.dumps(suggest) + "'")
        if response.status_code != 200:
            logger.warning("Suggeter Error: elastic response 200:" + url)
            logger.warning(response.json())
        return response.json()
コード例 #27
0
    def create_repository(self, repo, location):
        url = ElasticSettings.url() + '/_snapshot/' + repo
        if Snapshot.exists(repo, ''):
            logger.error("Repository " + repo + " already exists!")
            return False
        parent = os.path.abspath(os.path.join(location, ".."))

        if not os.path.isdir(parent):
            logger.warn("Check directory exists: " + parent)

        data = {"type": "fs", "settings": {"location": location}}
        resp = requests.put(url, data=json.dumps(data))
        if resp.status_code != 200:
            logger.error("Status (" + url + "): " + str(resp.status_code) +
                         " :: " + str(resp.json()["error"]))
        return True
コード例 #28
0
ファイル: test_utils.py プロジェクト: D-I-L/django-criteria
 def test_criteria_mappings(self, idx, idx_types):
     (main_codes, other_codes) = CriteriaManager.get_available_diseases()
     site_enabled_diseases = main_codes + other_codes
     elastic_url = ElasticSettings.url()
     for idx_type in idx_types:
         url = idx + '/' + idx_type + '/_mapping'
         response = Search.elastic_request(elastic_url, url, is_post=False)
         elastic_type_mapping = json.loads(response.content.decode("utf-8"))
         property_keys = list(elastic_type_mapping[idx]['mappings'][idx_type]['properties'].keys())
         '''check if score and disease_tags and qid are there in mapping'''
         self.assertIn('score', property_keys)
         self.assertIn('disease_tags', property_keys)
         self.assertIn('qid', property_keys)
         '''check if all the enabled diseases are there'''
         for disease in site_enabled_diseases:
             self.assertIn(disease, property_keys)
コード例 #29
0
    def get_models_to_delete(self):
        '''Get models to delete'''
        idx_key = 'CP_STATS_UD'
        idx = ElasticSettings.idx(idx_key)
        ''' Check if an index type exists in elastic and later check there is a contenttype/model for the given elastic index type. '''  # @IgnorePep8
        elastic_url = ElasticSettings.url()
        url = idx + '/_mapping'
        response = Search.elastic_request(elastic_url, url, is_post=False)

        if "error" in response.json():
            logger.warn(response.json())
            return None

        # get idx_types from _mapping
        elastic_mapping = json.loads(response.content.decode("utf-8"))
        ## fix needed if we deploy aliasing for indices
        idx = list(elastic_mapping.keys())[0]
        idx_types = list(elastic_mapping[idx]['mappings'].keys())

        models2go = []
        expire_days = 7  # 1 week

        # add idx_types that have no docs
        for idx_type in idx_types:
            ndocs = Search(idx=idx, idx_type=idx_type).get_count()['count']

            if (ndocs > 0):
                models2go.append(idx_type)

            # add idx_types that were not accessed for a given time period
            url = idx + '/' + idx_type + '/_meta'
            response = Search.elastic_request(elastic_url, url, is_post=False)
            elastic_meta = json.loads(response.content.decode("utf-8"))
            if '_source' in elastic_meta:
                uploaded_str_date = elastic_meta['_source']['uploaded']
                yymmdd_str = uploaded_str_date.split()[0]
                # Format: 2015-11-03 14:43:54.099645+00:00
                from datetime import datetime as dt
                dt = dt.strptime(yymmdd_str, '%Y-%m-%d')
                uploaded_date = dt.date()

                d1 = datetime.date.today()
                d2 = d1 - datetime.timedelta(days=expire_days)
                if uploaded_date < d2:
                    models2go.append(idx_type)

        return models2go
コード例 #30
0
    def get_models_to_delete(self):
        """Get models to delete"""
        idx_key = "CP_STATS_UD"
        idx = ElasticSettings.idx(idx_key)
        """ Check if an index type exists in elastic and later check there is a contenttype/model for the given elastic index type. """  # @IgnorePep8
        elastic_url = ElasticSettings.url()
        url = idx + "/_mapping"
        response = Search.elastic_request(elastic_url, url, is_post=False)

        if "error" in response.json():
            logger.warn(response.json())
            return None

        # get idx_types from _mapping
        elastic_mapping = json.loads(response.content.decode("utf-8"))
        idx_types = list(elastic_mapping[idx]["mappings"].keys())

        models2go = []
        expire_days = 7  # 1 weeks

        # add idx_types that have no docs
        for idx_type in idx_types:
            ndocs = Search(idx=idx, idx_type=idx_type).get_count()["count"]

            if ndocs <= 1:
                models2go.append(idx_type)

            # add idx_types that were not accessed for a given time period
            url = idx + "/" + idx_type + "/_meta"
            response = Search.elastic_request(elastic_url, url, is_post=False)
            elastic_meta = json.loads(response.content.decode("utf-8"))
            if "_source" in elastic_meta:
                uploaded_str_date = elastic_meta["_source"]["uploaded"]
                yymmdd_str = uploaded_str_date.split()[0]
                # Format: 2015-11-03 14:43:54.099645+00:00
                from datetime import datetime as dt

                dt = dt.strptime(yymmdd_str, "%Y-%m-%d")
                uploaded_date = dt.date()

                d1 = datetime.date.today()
                d2 = d1 - datetime.timedelta(days=expire_days)
                if uploaded_date < d2:
                    models2go.append(idx_type)

        return models2go
コード例 #31
0
ファイル: settings_idx.py プロジェクト: tottlefields/pydgin
 def setupIdx(cls, idx_name_arr):
     ''' Setup indices in the given array of key names (e.g. ['GENE', 'DIISEASE', ...]). '''
     idx_settings = {
         "settings": {
             "analysis": {
                 "analyzer": {
                     "full_name": {"filter": ["standard", "lowercase"], "tokenizer": "keyword"}}
             },
             "number_of_shards": 1
         }
     }
     IDX = PydginTestSettings.IDX
     for name in idx_name_arr:
         requests.put(ElasticSettings.url() + '/' + IDX[name]['indexName'], data=json.dumps(idx_settings))
         call_command('index_search', **IDX[name])
     for name in idx_name_arr:
         # wait for the elastic load to finish
         Search.index_refresh(IDX[name]['indexName'])
コード例 #32
0
def add_disease_locus(seqid, locus_id, regionName, disease, tier, species, weight, doc_ids):
    data = {
        "region_name": disease+" "+regionName,
        "disease": disease,
        "tier": tier,
        "species": species,
        "tags": {"weight": weight},
        "locus_id": locus_id,
        "seqid": seqid,
        "hits": doc_ids
    }
    #    "suggest": {"input": [disease+" "+regionName, regionName], "weight": weight}
    resp = requests.put(ElasticSettings.url()+'/' + idx+'/disease_locus/'+locus_id, data=json.dumps(data))
    if resp.status_code != 201:
        print(str(resp.content))
        print("Problem loading "+getattr(doc, "disease")+" "+regionName)
    else:
        print("Loaded "+locus_id+" - "+regionName)
コード例 #33
0
    def create_snapshot(cls, repo, snapshot, indices):
        ''' Create a snapshot for the specified indices or all if
        indices is None. '''
        url = ElasticSettings.url(
        ) + '/_snapshot/' + repo + '/' + snapshot + '?wait_for_completion=true'
        resp = requests.get(url)
        if resp.status_code == 200:
            logger.error("Snapshot " + snapshot + " already exists!")
            return False

        data = {}
        if indices is not None:
            data = {"indices": indices}
        resp = requests.put(url, data=json.dumps(data))
        if resp.status_code != 200:
            logger.error("Snapshot " + snapshot + " create error! :: " +
                         str(resp.json()["error"]))
        return True
コード例 #34
0
    def get_context_models_to_delete(self, *args, **options):
        '''Get models to delete'''
        ct = options['content_type']
        retDict = dict()
        retDict['acknowledged'] = 0
        logger.debug(ct)
        idx_key = 'CP_STATS_UD'
        idx = ElasticSettings.idx(idx_key)
        ''' Check if an index type exists in elastic and later check there is a contenttype/model for the given elastic index type. '''  # @IgnorePep8
        elastic_url = ElasticSettings.url()
        url = idx + '/_mapping'
        response = Search.elastic_request(elastic_url, url, is_post=False)
        if "error" in response.json():
            logger.warn(response.json())
            retDict['errorMsg'] = response.json()
            self.stdout.write(json.dumps(retDict))

        # get idx_types from _mapping
        elastic_mapping = json.loads(response.content.decode("utf-8"))
        ## fix needed if we deploy aliasing for indices
        idx = list(elastic_mapping.keys())[0]
        idx_types = list(elastic_mapping[idx]['mappings'].keys())
        logger.debug(idx_types)

        # add idx_types that have no docs
        for idx_type in idx_types:
            if idx_type != ct:
                continue
            logger.debug("Found " + idx_type + "  equal to " + ct)
            ndocs = Search(idx=idx, idx_type=idx_type).get_count()['count']
            #logger.debug(Search(idx=idx, idx_type=idx_type).get_json_response())
            logger.debug("WE have " + str(ndocs))
            if (ndocs > 0):
                for cnt in ContentType.objects.filter():
                    if str(cnt.name).endswith(ct + '_idx_type'):
                        logger.debug(
                            'Matched, finding permissions for %s  %s' %
                            (str(cnt.name), str(cnt.id)))
                        logger.debug("deleting %s" % ct)
                        cnt.delete()
        retDict['acknowledged'] = 1
        #logger.debug(retDict)
        self.stdout.write(json.dumps(retDict))
コード例 #35
0
 def add_arguments(self, parser):
     parser.add_argument('snapshot', type=str, help='Snapshot to restore.')
     parser.add_argument('--url',
                         dest='url',
                         default=ElasticSettings.url(),
                         metavar="ELASTIC_URL",
                         help='Elastic URL to restore to.')
     parser.add_argument('--repo',
                         dest='repo',
                         default=ElasticSettings.getattr('REPOSITORY'),
                         metavar=ElasticSettings.getattr('REPOSITORY'),
                         help='Repository name')
     parser.add_argument(
         '--indices',
         dest='indices',
         default=None,
         metavar="idx1,idx2",
         help=
         'Indices (comma separated) to be restored from a snapshot (default all).'
     )
コード例 #36
0
 def test_server(self):
     ''' Test elasticsearch server is running and status '''
     try:
         url = ElasticSettings.url() + '/_cluster/health/'
         resp = requests.get(url)
         self.assertEqual(resp.status_code, 200, "Health page status code")
         if resp.json()['status'] == 'red':  # allow status to recover if necessary
             for _ in range(3):
                 time.sleep(1)
                 resp = requests.get(url)
                 if resp.json()['status'] != 'red':
                     break
         self.assertFalse(resp.json()['status'] == 'red', 'Health report - red')
     except requests.exceptions.Timeout:
         self.assertTrue(False, 'timeout exception')
     except requests.exceptions.TooManyRedirects:
         self.assertTrue(False, 'too many redirects exception')
     except requests.exceptions.ConnectionError:
         self.assertTrue(False, 'request connection exception')
     except requests.exceptions.RequestException:
         self.assertTrue(False, 'request exception')
コード例 #37
0
    def get_elastic_settings_with_user_uploads(cls, elastic_dict=None):
        '''Get the updated elastic settings with user uploaded idx_types'''

        idx_key = 'CP_STATS_UD'
        idx = ElasticSettings.idx(idx_key)

        ''' Check if an index exists. '''
        elastic_url = ElasticSettings.url()
        url = idx + '/_mapping'
        response = Search.elastic_request(elastic_url, url, is_post=False)

        if "error" in response.json():
            logger.warn(response.json())
            return None

        # get idx_types from _mapping
        elastic_mapping = json.loads(response.content.decode("utf-8"))
        idx_types = list(elastic_mapping[idx]['mappings'].keys())

        if elastic_dict is None:
            elastic_dict = ElasticSettings.attrs().get('IDX')

        idx_type_dict = {}

        for idx_type in idx_types:

            meta_url = idx + '/' + idx_type + '/_meta/_source'
            meta_response = Search.elastic_request(elastic_url, meta_url, is_post=False)

            try:
                elastic_meta = json.loads(meta_response.content.decode("utf-8"))
                label = elastic_meta['label']
            except:
                label = "UD-" + idx_type

            idx_type_dict['UD-' + idx_type.upper()] = {'label': label, 'type': idx_type}

        elastic_dict['CP_STATS_UD']['idx_type'] = idx_type_dict
        return elastic_dict
コード例 #38
0
    def test_create_restore_delete_snapshot(self):
        self.wait_for_running_snapshot()
        snapshot = 'test_' + ElasticSettings.getattr('TEST')
        repo = SnapshotTest.TEST_REPO

        # create a snapshot
        call_command('snapshot',
                     snapshot,
                     indices=IDX['MARKER']['indexName'],
                     repo=repo)
        Snapshot.wait_for_snapshot(repo, snapshot)
        self.assertTrue(Snapshot.exists(repo, snapshot),
                        "Created snapshot " + snapshot)
        # snapshot already exist so return false
        self.assertFalse(
            Snapshot.create_snapshot(repo, snapshot,
                                     IDX['MARKER']['indexName']))

        # delete index
        requests.delete(ElasticSettings.url() + '/' +
                        IDX['MARKER']['indexName'])
        self.assertFalse(Search.index_exists(IDX['MARKER']['indexName']),
                         "Removed index")
        # restore from snapshot
        call_command('restore_snapshot',
                     snapshot,
                     repo=repo,
                     indices=IDX['MARKER']['indexName'])
        Search.index_refresh(IDX['MARKER']['indexName'])
        self.assertTrue(Search.index_exists(IDX['MARKER']['indexName']),
                        "Restored index exists")

        # remove snapshot
        call_command('snapshot', snapshot, delete=True, repo=repo)
        Snapshot.wait_for_snapshot(repo, snapshot, delete=True, count=10)
        self.assertFalse(Snapshot.exists(repo, snapshot),
                         "Deleted snapshot " + snapshot)
コード例 #39
0
ファイル: disease.py プロジェクト: D-I-L/django-data-pipeline
 def idx(cls, disease_f, idx, idx_type):
     ''' Parse and load data for cytobands. '''
     for line in disease_f:
         line = line.strip()
         if line.startswith("#"):
             continue
         parts = re.split('\t', line)
         data = {
             "name": parts[0],
             "code": parts[2].lower(),
             "description": parts[1],
             "colour": parts[3],
             "tier": int(parts[4])
         }
         data['suggest'] = {}
         data['suggest']["input"] = [parts[2].lower(), parts[0]]
         data['suggest']["weight"] = 250
         resp = requests.put(ElasticSettings.url()+'/' +
                             idx+'/'+idx_type+'/'+parts[2].lower(),
                             data=json.dumps(data))
         if resp.status_code == 201:
             logger.debug("Loaded "+parts[0])
         else:
             logger.error("Problem loading "+parts[0])
コード例 #40
0
def tearDownModule():
    ''' Remove loaded test indices and test repository. '''
    for key in IDX:
        requests.delete(ElasticSettings.url() + '/' + IDX[key]['indexName'])
    call_command('repository', SnapshotTest.TEST_REPO, delete=True)
コード例 #41
0
def tearDownModule():
    ''' Remove test indices '''
    requests.delete(ElasticSettings.url() + '/' + IDX['MARKER']['indexName'])
コード例 #42
0
ファイル: views.py プロジェクト: premanand17/django-chicp
def chicpeaDeleteUD(request, url):
    queryDict = request.POST
    idx_type = queryDict.get("userDataIdx")
    idx = getattr(chicp_settings, 'CHICP_IDX').get('userdata').get('INDEX')
    output = subprocess.check_output("curl -XDELETE '"+ElasticSettings.url()+"/"+idx+"/"+idx_type+"'", shell=True)
    return HttpResponse(output, content_type="application/json")
コード例 #43
0
from elastic.elastic_settings import ElasticSettings

OVERRIDE_SETTINGS_CHICP = \
    {'default': {
        'ELASTIC_URL': ElasticSettings.url(),
        'IDX': {
            'CP_STATS_UD': {
                'name': 'cp:hg19_userdata_bed',
                'label': 'User Data',
                'idx_type': {},
                },
            'CP_STATS_IC': {
                'name': 'cp:hg19_immunochip_bed',
                'label': 'ImmunoChip',
                'idx_type': {
                    'IC-ATD_COOPER': {'label': "ATD - Cooper et al.", 'type': 'atd_cooper', 'auth_public': True},
                    'IC-CEL_TRYNKA': {'label': "CEL - Trynka et al.", 'type': 'cel_trynka', 'auth_public': True},
                    'IC-JIA_HINKS_UK': {'label': "JIA - Hinks et al. UK", 'type': 'jia_hinks_uk'},
                    'IC-MS_IMSGC': {'label': "MS - IMSGC et al.", 'type': 'ms_imsgc'},
                    'IC-NAR_FARACO': {'label': "NAR - Faraco et al.", 'type': 'nar_faraco'},
                    'IC-PBC_LIU': {'label': "PBC - Liu et al.", 'type': 'pbc_liu', 'auth_public': True},
                    'IC-RA_EYRE': {'label': "RA - Eyre et al.", 'type': 'ra_eyre', 'auth_public': True},
                    'IC-T1D_ONENGUT': {'label': 'T1D - Onengut et al.', 'type': 't1d_onengut', 'auth_public': True},
                },
                'auth_public': True,
            },
            'CP_STATS_GWAS': {
                'name': 'cp:hg19_gwas_bed',
                'label': 'GWAS Statistic',
                'idx_type': {
                    'GWAS-DUBOIS': {'label': 'CEL - Dubois et al.', 'type': 'cel_dubois',
コード例 #44
0
ファイル: tests_pydgin.py プロジェクト: D-I-L/pydgin
 def test_settings(self):
     ''' Test elastic server is running. '''
     resp = requests.get(ElasticSettings.url())
     self.assertEqual(resp.status_code, 200)
コード例 #45
0
    def marker_is_gwas_significant_in_ic(cls, hit, section=None, config=None, result_container={}):
        """
        /hg38_gwas_statistics,hg38_ic_statistics/_search?pretty' -d '{"query":{"range":{"p_value":{"lt": 0.00000005}}}}'
        """

        gw_sig_p = 0.00000005
        feature_doc = hit["_source"]
        feature_doc["_id"] = hit["_id"]

        idx = hit["_index"]
        idx_type = hit["_type"]

        # get meta data
        # studyid and diseaes
        elastic_url = ElasticSettings.url()
        meta_url = idx + "/" + idx_type + "/_mapping"
        meta_response = Search.elastic_request(elastic_url, meta_url, is_post=False)

        try:
            elastic_meta = json.loads(meta_response.content.decode("utf-8"))
            meta_info = elastic_meta[idx]["mappings"][idx_type]["_meta"]
            disease = meta_info["disease"]
            dil_study_id = meta_info["study"]
        except:
            disease = None
            dil_study_id = None

        marker = None
        if "marker" in feature_doc:
            marker = feature_doc["marker"]

        if marker is None or disease is None:
            return result_container

        p_val = feature_doc["p_value"]
        if p_val is None:
            return result_container
        global counter
        counter = counter + 1

        p_val_to_compare = float(p_val)
        if p_val_to_compare < gw_sig_p:
            if dil_study_id is None or dil_study_id == "None":
                first_author = "NA"
                dil_study_id = "NA"
            else:
                query = ElasticQuery(Query.ids([dil_study_id]))
                elastic = Search(search_query=query, idx=ElasticSettings.idx("STUDY", "STUDY"), size=1)
                study_doc = elastic.search().docs[0]
                author = getattr(study_doc, "authors")[0]
                first_author = author["name"] + " " + author["initials"]

            fnotes = {
                "linkdata": "pval",
                "linkvalue": p_val_to_compare,
                "linkid": dil_study_id,
                "linkname": first_author,
            }
            result_container_populated = cls.populate_container(
                dil_study_id,
                first_author,
                fnotes=fnotes,
                features=[marker],
                diseases=[disease],
                result_container=result_container,
            )
            return result_container_populated
        else:
            return result_container
コード例 #46
0
ファイル: region_resources.py プロジェクト: D-I-L/pydgin
    def filter_queryset(self, request, queryset, view):
        ''' Get disease regions. '''
        try:
            filterable = getattr(view, 'filter_fields', [])
            filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])
            dis = filters.get('disease', 'T1D')
            show_genes = filters.get('genes', False)
            show_markers = filters.get('markers', False)
            show_regions = filters.get('regions', True)

            build = self._get_build(filters.get('build', settings.DEFAULT_BUILD))
            docs = DiseaseLocusDocument.get_disease_loci_docs(dis)
            if len(docs) == 0:
                messages.error(request, 'No regions found for '+dis+'.')

            visible_hits = DiseaseLocusDocument.get_hits([h for r in docs for h in getattr(r, 'hits')])
            regions = []
            all_markers = []
            all_genes = []
            ens_all_cand_genes = []
            for r in docs:
                region = r.get_disease_region(visible_hits, build=build)
                if region is not None:
                    ens_all_cand_genes.extend(region['ens_cand_genes'])
                    all_markers.extend(region['markers'])
                    region['hits'] = [self._study_hit_obj(s, region) for s in
                                      StudyHitDocument.process_hits(r.hit_docs, region['all_diseases'])]

                    (all_coding, all_non_coding) = views.get_genes_for_region(getattr(r, "seqid"),
                                                                              region['rstart']-500000,
                                                                              region['rstop']+500000)
                    (region_coding, coding_up, coding_down) = views._region_up_down(all_coding, region['rstart'],
                                                                                    region['rstop'])
                    (region_non_coding, non_coding_up, non_coding_down) = \
                        views._region_up_down(all_non_coding, region['rstart'], region['rstop'])
                    region['genes'] = {
                        'upstream': {'coding': [g.doc_id() for g in coding_up],
                                     'non_coding': [g.doc_id() for g in non_coding_up]},
                        'region': {'coding': [g.doc_id() for g in region_coding],
                                   'non_coding': [g.doc_id() for g in region_non_coding]},
                        'downstream': {'coding': [g.doc_id() for g in coding_down],
                                       'non_coding': [g.doc_id() for g in non_coding_down]},
                    }
                    all_genes.extend(region['genes']['region']['coding'])
                    all_genes.extend(region['genes']['region']['non_coding'])
                    regions.append(region)

            # look for pleiotropy by looking for diseases for the markers in IC_STATS and other study hits
            stats_query = ElasticQuery.filtered(Query.terms("marker", all_markers),
                                                Filter(RangeQuery("p_value", lte=5E-08)))
            stats_docs = Search(stats_query, idx=ElasticSettings.idx("IC_STATS"), size=len(all_markers)).search().docs
            meta_response = Search.elastic_request(ElasticSettings.url(), ElasticSettings.idx("IC_STATS") + '/_mapping',
                                                   is_post=False)
            # get ensembl to gene symbol mapping for all candidate genes
            extra_markers = []
            for region in regions:
                # add diseases from IC/GWAS stats
                (study_ids, region['marker_stats']) = views._process_stats(stats_docs, region['markers'], meta_response)
                region['all_diseases'].extend([getattr(mstat, 'disease') for mstat in region['marker_stats']])

                other_hits_query = ElasticQuery(
                        BoolQuery(must_arr=[RangeQuery("tier", lte=2), Query.terms("marker", region['markers'])],
                                  must_not_arr=[Query.terms("dil_study_id", study_ids)]))
                other_hits = Search(other_hits_query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'),
                                    size=100).search()
                region['extra_markers'] = [self._study_hit_obj(s, region) for s in
                                           StudyHitDocument.process_hits(other_hits.docs, region['all_diseases'])]
                region['all_diseases'] = list(set(region['all_diseases']))
                extra_markers.extend([m['marker_id'] for m in region['extra_markers']])

            # get markers
            marker_objs = []
            if show_markers:
                query = ElasticQuery(Query.terms("id", all_markers), sources=['id', 'start'])
                marker_docs = Search(search_query=query, idx=ElasticSettings.idx('MARKER', 'MARKER'),
                                     size=len(all_markers)).search().docs
                mids = {getattr(m, 'id'): getattr(m, 'start') for m in marker_docs}
                marker_objs = [h for r in regions for h in r['hits']]
                marker_objs.extend([h for r in regions for h in r['extra_markers']])
                for m in marker_objs:
                    m['start'] = mids[m['marker_id']]

            # get genes
            gene_objs = []
            if show_genes:
                all_genes.extend(ens_all_cand_genes)
                gene_docs = GeneDocument.get_genes(all_genes, sources=['start', 'stop', 'chromosome',
                                                                       'symbol', 'biotype'])
                for doc in Document.sorted_alphanum(gene_docs, 'chromosome'):
                    ensembl_id = doc.doc_id()
                    region_name = ''
                    candidate_gene = 0
                    for region in regions:
                        if ('genes' in region and
                            (ensembl_id in region['genes']['region']['coding'] or
                             ensembl_id in region['genes']['region']['non_coding'] or
                             ensembl_id in region['ens_cand_genes'])):
                            region_name = region['region_name']
                            candidate_gene = 1 if ensembl_id in region['ens_cand_genes'] else 0
                            break
                    gene_objs.append({
                        'ensembl_id': ensembl_id,
                        'seqid': 'chr'+getattr(doc, 'chromosome'),
                        'start': getattr(doc, 'start'),
                        'end': getattr(doc, 'stop'),
                        'symbol': getattr(doc, 'symbol'),
                        'biotype': getattr(doc, 'biotype'),
                        'region_name': region_name,
                        'candidate_gene': candidate_gene
                    })
            if show_regions == 'false':
                regions = []
            regions.extend(gene_objs)
            regions.extend(marker_objs)
            return regions
        except (TypeError, ValueError, IndexError, ConnectionError) as e:
            print(e)
            raise Http404
コード例 #47
0
ファイル: views.py プロジェクト: D-I-L/pydgin
    def get_disease(cls, request, disease, context):
        disease = disease.lower()
        if disease is None:
            messages.error(request, 'No disease given.')
            raise Http404()
        query = ElasticQuery(Query.terms("code", [disease.split(',')]))
        elastic = Search(query, idx=ElasticSettings.idx('DISEASE', 'DISEASE'), size=5)
        res = elastic.search()
        if res.hits_total == 0:
            messages.error(request, 'Disease(s) '+disease+' not found.')
        elif res.hits_total < 9:
            disease_docs = res.docs
            names = ', '.join([getattr(doc, 'name') for doc in disease_docs])

            meta_response = Search.elastic_request(ElasticSettings.url(), ElasticSettings.idx("IC_STATS") + '/_mapping',
                                                   is_post=False)
            elastic_meta = json.loads(meta_response.content.decode("utf-8"))
            disease_docs = res.docs
            for dis in disease_docs:
                dis_code = getattr(dis, 'code').upper()
                docs = DiseaseLocusDocument.get_disease_loci_docs(dis_code)
                regions = []
                ens_all_cand_genes = []
                all_markers = []
                for r in docs:
                    region = r.get_disease_region()
                    if region is not None:
                        regions.append(region)
                        ens_all_cand_genes.extend(region['ens_cand_genes'])
                        all_markers.extend(region['markers'])

                # get ensembl to gene symbol mapping for all candidate genes
                all_cand_genes = gene.utils.get_gene_docs_by_ensembl_id(ens_all_cand_genes)
                for region in regions:
                    region['cand_genes'] = {cg: all_cand_genes[cg] for cg in region.pop("ens_cand_genes", None)}
                setattr(dis, 'regions', regions)

                # look for pleiotropy by looking for diseases for the markers in IC_STATS and other study hits
                stats_query = ElasticQuery.filtered(Query.terms("marker", all_markers),
                                                    Filter(RangeQuery("p_value", lte=5E-08)), sources=['marker'])
                stats_docs = Search(stats_query, idx=ElasticSettings.idx("IC_STATS"),
                                    size=len(all_markers)).search().docs

                other_hits_query = ElasticQuery(
                        BoolQuery(must_arr=[RangeQuery("tier", lte=2), Query.terms("marker", all_markers)]),
                        sources=['marker', 'disease'])
                other_hits = Search(other_hits_query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'),
                                    size=5000).search().docs

                for region in regions:
                    diseases = [dis_code]
                    for doc in stats_docs:
                        if getattr(doc, 'marker') in region['markers']:
                            meta_info = elastic_meta[doc.index()]['mappings'][doc.type()]['_meta']
                            if meta_info['disease'] not in diseases:
                                diseases.append(meta_info['disease'])

                    for doc in other_hits:
                        if getattr(doc, 'marker') in region['markers']:
                            if doc.disease is not None and doc.disease not in diseases:
                                diseases.append(doc.disease)
                    region['diseases'] = diseases

                studies = StudyDocument.get_studies(disease_code=dis_code)
                for doc in studies:
                    setattr(doc, 'study_id', getattr(doc, 'study_id').replace('GDXHsS00', ''))
                    pmid = getattr(doc, 'principal_paper')
                    pubs = PublicationDocument.get_publications(pmid, sources=['date', 'authors.name', 'journal'])
                    if len(pubs) > 0:
                        authors = getattr(pubs[0], 'authors')
                        setattr(doc, 'date', getattr(pubs[0], 'date'))
                        setattr(doc, 'journal', getattr(pubs[0], 'journal'))
                        setattr(doc, 'author', authors[0]['name'].rsplit(None, 1)[-1] if authors else "")
                setattr(dis, 'studies',  studies)

            context['features'] = disease_docs
            context['title'] = names
            return context
        raise Http404()
コード例 #48
0
ファイル: settings_idx.py プロジェクト: tottlefields/pydgin
 def tearDownIdx(cls, idx_name_arr):
     ''' Remove indices by their key names (e.g. ['GENE', 'DIISEASE', ...]). '''
     for name in idx_name_arr:
         requests.delete(ElasticSettings.url() + '/' + PydginTestSettings.IDX[name]['indexName'])
コード例 #49
0
ファイル: views.py プロジェクト: D-I-L/pydgin
    def get_regions(cls, request, dis, context):
        # is_authenticated = False
        elastic_url = ElasticSettings.url()

        (core, other) = Disease.get_site_diseases(dis_list=dis.upper().split(','))
        if len(core) == 0 and len(other) == 0:
            messages.error(request, 'Disease '+dis+' not found.')
            raise Http404()

        disease = core[0] if len(core) > 0 else other[0]
        context['title'] = getattr(disease, "name")+" Regions"

        docs = DiseaseLocusDocument.get_disease_loci_docs(dis)
        if len(docs) == 0:
            messages.error(request, 'No regions found for '+dis+'.')
            raise Http404()

        visible_hits = DiseaseLocusDocument.get_hits([h for r in docs for h in getattr(r, 'hits')])
        meta_response = Search.elastic_request(elastic_url, ElasticSettings.idx("IC_STATS") + '/_mapping',
                                               is_post=False)
        regions = []
        ens_all_cand_genes = []
        all_markers = []
        for r in docs:
            region = r.get_disease_region(visible_hits)
            if region is not None:
                ens_all_cand_genes.extend(region['ens_cand_genes'])
                all_markers.extend(region['markers'])
                region['hits'] = StudyHitDocument.process_hits(r.hit_docs, region['all_diseases'])

                (all_coding, all_non_coding) = get_genes_for_region(getattr(r, "seqid"),
                                                                    region['rstart']-500000, region['rstop']+500000)
                (region_coding, coding_up, coding_down) = _region_up_down(all_coding, region['rstart'], region['rstop'])
                (region_non_coding, non_coding_up, non_coding_down) = \
                    _region_up_down(all_non_coding, region['rstart'], region['rstop'])
                region['genes'] = {
                    'upstream': {'coding': coding_up, 'non_coding': non_coding_up},
                    'region': {'coding': region_coding, 'non_coding': region_non_coding},
                    'downstream': {'coding': coding_down, 'non_coding': non_coding_down},
                }
                regions.append(region)

        # look for pleiotropy by looking for diseases for the markers in IC_STATS and other study hits
        stats_query = ElasticQuery.filtered(Query.terms("marker", all_markers),
                                            Filter(RangeQuery("p_value", lte=5E-08)))
        stats_docs = Search(stats_query, idx=ElasticSettings.idx("IC_STATS"), size=len(all_markers)).search().docs

        # get ensembl to gene symbol mapping for all candidate genes
        all_cand_genes = gene.utils.get_gene_docs_by_ensembl_id(ens_all_cand_genes)
        for region in regions:
            region['cand_genes'] = {cg: all_cand_genes[cg] for cg in region.pop("ens_cand_genes", None)}
            (study_ids, region['marker_stats']) = _process_stats(stats_docs, region['markers'], meta_response)

            # add diseases from IC/GWAS stats
            region['all_diseases'].extend([getattr(mstat, 'disease') for mstat in region['marker_stats']])

            other_hits_query = ElasticQuery(
                        BoolQuery(must_arr=[RangeQuery("tier", lte=2), Query.terms("marker", region['markers'])],
                                  must_not_arr=[Query.terms("dil_study_id", study_ids)]))
            other_hits = Search(other_hits_query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'), size=100).search()
            region['extra_markers'] = StudyHitDocument.process_hits(other_hits.docs, region['all_diseases'])

        context['regions'] = regions
        context['disease_code'] = [dis]
        context['disease'] = getattr(disease, "name")
        return context
コード例 #50
0
    def get_elastic_settings_with_user_uploads(cls,
                                               elastic_dict=None,
                                               new_upload_file=None):
        '''Get the updated elastic settings with user uploaded idx_types'''

        idx_key = 'CP_STATS_UD'
        idx = ElasticSettings.idx(idx_key)
        ''' Check if an index type exists in elastic and later check there is a contenttype/model for the given elastic index type. '''  # @IgnorePep8
        elastic_url = ElasticSettings.url()
        url = idx + '/_mapping'
        response = Search.elastic_request(elastic_url, url, is_post=False)
        ''' why don't we use Search.get_mapping ? I guess it's not a class method'''
        #logger.debug(response.json())
        if "error" in response.json():
            logger.warn(response.json())
            return None

        # get idx_types from _mapping
        elastic_mapping = json.loads(response.content.decode("utf-8"))
        # here if we use aliasing then idx can be different
        # this causes problems as it's effectively hardcoded
        # this should fix to handle things where aliases are deployed
        idx = list(elastic_mapping.keys())[0]
        idx_types = list(elastic_mapping[idx]['mappings'].keys())

        if elastic_dict is None:
            elastic_dict = ElasticSettings.attrs().get('IDX')

        idx_type_dict = {}

        existing_ct = [
            ct.name for ct in ContentType.objects.filter(
                app_label=cls.PERMISSION_MODEL_APP_NAME)
        ]

        for idx_type in idx_types:

            idx_type_with_suffix = idx_type + cls.PERMISSION_MODEL_TYPE_SUFFIX

            for ct in existing_ct:
                if ct.endswith(idx_type_with_suffix):

                    meta_url = idx + '/' + idx_type + '/_meta/_source'
                    meta_response = Search.elastic_request(elastic_url,
                                                           meta_url,
                                                           is_post=False)

                    try:
                        elastic_meta = json.loads(
                            meta_response.content.decode("utf-8"))
                        label = elastic_meta['label']
                    except:
                        label = "UD-" + idx_type

                    idx_type_dict['UD-' + idx_type.upper()] = {
                        'label': label,
                        'type': idx_type
                    }

        if new_upload_file is not None:
            idx_type = new_upload_file
            label = "UD-" + idx_type
            idx_type_dict['UD-' + idx_type.upper()] = {
                'label': label,
                'type': idx_type
            }

        elastic_dict['CP_STATS_UD']['idx_type'] = idx_type_dict
        return elastic_dict
コード例 #51
0
            )
        elastic2 = Search(search_query=query2, idx=idx, idx_type='hits',
                          size=int(disease_bucket['doc_count']+1), qsort=Sort(buildSort))
        results = elastic2.search()
        minPos = 0
        maxPos = 0
        tier = 4
        weight = 0
        regionCount = 1
        regionName = ''
        species = ''
        doc_ids = []
        if len(results.docs) > 0:
            for doc in results.docs:
                # print(doc)
                os.system("curl -XPOST '"+ElasticSettings.url()+"/"+idx+"/hits/" + doc.doc_id() +
                          "/_update?pretty' -d '{\"doc\": {\"disease_locus\": \"TBC\"}}' > /dev/null 2>&1")
                build_info = None
                for b in getattr(doc, 'build_info'):
                    if b['build'] == build:
                        build_info = b
                if build_info is None:
                    print("ERROR - no build information found for b"+str(build))
                    continue

                # print(getattr(doc, "disease")+"\t"+getattr(doc, "marker")+"\t" + getattr(doc, "chr_band") + "\t" +
                #      build_info['seqid'] + "\t" + str(build_info['start']) + "\t" + str(build_info['end']))
                if minPos == 0 and maxPos == 0:
                    minPos = build_info['start']
                    maxPos = build_info['end']
コード例 #52
0
def tearDownModule():
    ''' Remove test indices '''
    requests.delete(ElasticSettings.url() + '/' + IDX['MARKER']['indexName'])
    requests.delete(ElasticSettings.url() + '/' + IDX['GFF_GENERIC']['indexName'])
    requests.delete(ElasticSettings.url() + '/' + IDX['JSON_NESTED']['indexName'])
コード例 #53
0
ファイル: criteria.py プロジェクト: D-I-L/django-criteria
    def process_criteria(cls, feature, section, config, sub_class, test=False):
        ''' Top level function that calls the right criteria implementation based on the subclass passed. Iterates over all the
            documents using the ScanAndScroll and the hits are processed by the inner function process_hits.
            The entire result is stored in result_container (a dict), and at the end of the processing, the result is
            loaded in to the elastic index after creating the mapping
        @type  feature: string
        @param feature: feature type, could be 'gene','region', 'marker' etc.,
        @type  section: string
        @keyword section: The section in the criteria.ini file
        @type  config:  string
        @keyword config: The config object initialized from criteria.ini.
        @type  sub_class: string
        @param sub_class: The name of the inherited sub_class where the actual implementation is
        '''
        global gl_result_container
        gl_result_container = {}
        test_mode = test
        if config is None:
            if test_mode:
                config = CriteriaManager().get_criteria_config(ini_file='test_criteria.ini')
            else:
                config = CriteriaManager().get_criteria_config(ini_file='criteria.ini')

        section_config = config[section]
        source_idx = section_config['source_idx']

        if ',' in source_idx:
            idxs = source_idx.split(',')
            idx_all = [ElasticSettings.idx(idx) for idx in idxs]
            source_idx = ','.join(idx_all)
        else:
            source_idx = ElasticSettings.idx(section_config['source_idx'])

        source_idx_type = None
        if 'source_idx_type' in section_config:
            source_idx_type = section_config['source_idx_type']

        if source_idx_type is not None:
            source_idx = ElasticSettings.idx(section_config['source_idx'], idx_type=section_config['source_idx_type'])
        else:
            source_idx_type = ''

        logger.warning(source_idx + ' ' + source_idx_type)

        def process_hits(resp_json):
            global gl_result_container
            hits = resp_json['hits']['hits']
            global hit_counter
            for hit in hits:
                hit_counter = hit_counter + 1

                result_container = sub_class.tag_feature_to_disease(hit, section, config,
                                                                    result_container=gl_result_container)
                gl_result_container = result_container

                if test_mode:
                    if gl_result_container is not None and len(gl_result_container) > 5:
                        return

        query = cls.get_elastic_query(section, config)

        if test_mode:
            result_size = len(gl_result_container)
            from_ = 0
            size_ = 20
            while (result_size < 1):
                from_ = from_ + size_
                url = ElasticSettings.url()
                if 'mhc' in section:
                    url_search = (source_idx + '/_search')
                else:
                    url_search = (source_idx + '/_search?from=' + str(from_) + '&size=' + str(size_))

                if query is None:
                    query = {
                              "query": {"match_all": {}},
                              "size":  20
                              }
                    response = Search.elastic_request(url, url_search, data=json.dumps(query))
                    query = None
                else:
                    # print(query)
                    response = Search.elastic_request(url, url_search, data=json.dumps(query.query))

                process_hits(response.json())
                if gl_result_container is not None:
                    result_size = len(gl_result_container)
        else:
            ScanAndScroll.scan_and_scroll(source_idx, call_fun=process_hits, query=query)

        cls.map_and_load(feature, section, config, gl_result_container)
コード例 #54
0
def tearDownModule():
    ''' Remove test indices '''
    requests.delete(ElasticSettings.url() + '/' + IDX['MARKER']['indexName'])
コード例 #55
0
ファイル: views.py プロジェクト: D-I-L/django-chicp
def chicpeaDeleteUD(request, url):
    queryDict = request.POST
    idx_type = queryDict.get("userDataIdx")
    idx = ElasticSettings.idx('CP_STATS_UD')
    output = subprocess.check_output("curl -XDELETE '"+ElasticSettings.url()+"/"+idx+"/"+idx_type+"'", shell=True)
    return HttpResponse(output, content_type="application/json")