Ejemplo n.º 1
0
    def test_region_attributes(self):
        ''' test region attributes '''
        idx = ElasticSettings.idx(RegionDataTest.IDX_KEY, 'REGION')
        (idx, idx_type) = idx.split('/')
        docs = ElasticUtils.get_rdm_docs(idx, idx_type, qbool=Query.match_all(), sources=[], size=1)
        newRegion = utils.Region.pad_region_doc(docs[0])

        if len(getattr(newRegion, "genes")) > 0:
            query = ElasticQuery(Query.ids(getattr(newRegion, "genes")))
            resultObject = Search(query, idx=ElasticSettings.idx('GENE', 'GENE'),
                                  size=len(getattr(newRegion, "genes"))).search()
            self.assertEqual(len(getattr(newRegion, "genes")), resultObject.hits_total,
                             "All genes on region found in GENE index")

        if len(getattr(newRegion, "studies")) > 0:
            query = ElasticQuery(Query.ids(getattr(newRegion, "studies")))
            resultObject = Search(query, idx=ElasticSettings.idx('STUDY', 'STUDY'),
                                  size=len(getattr(newRegion, "studies"))).search()
            self.assertEqual(len(getattr(newRegion, "studies")), resultObject.hits_total,
                             "All study ids for region found in STUDY index")

        if len(getattr(newRegion, "pmids")) > 0:
            query = ElasticQuery(Query.ids(getattr(newRegion, "pmids")))
            resultObject = Search(query, idx=ElasticSettings.idx('PUBLICATION', 'PUBLICATION'),
                                  size=len(getattr(newRegion, "pmids"))).search()
            self.assertEqual(len(getattr(newRegion, "pmids")), resultObject.hits_total,
                             "All PMIDs for region found in PUBLICATION index")
    def test_create_idx_type_model_permissions(self):
        elastic_settings_before = ElasticSettings.attrs().get('IDX')
        user_types_before = elastic_settings_before['CP_STATS_UD']['idx_type']
        self.assertEqual({}, user_types_before, 'CP_STATS_UD idx_type is empty')

        idx = "cp:hg19_userdata_bed"
        new_upload_file = "tmp_newly_uploaded_file"
        idx_type = new_upload_file

        os.system("curl -XPUT "+ElasticSettings.url()+"/"+idx+"/_mapping/"+idx_type+" -d '{\"" +
                  idx_type + "\":{ \"properties\" : {\"message\" : {\"type\" : \"string\", \"store\" : true } } }}'")

        os.system("curl -XPUT "+ElasticSettings.url()+"/"+idx+"/"+idx_type+"/_meta -d '{\"label\": \"" +
                  new_upload_file + "\", \"owner\": \""+self.user.username+"\", \"uploaded\": \"" +
                  str(timezone.now())+"\"}'")

        elastic_settings_after = elastic_factory.create_idx_type_model_permissions(self.user,
                                                                                   indexKey='CP_STATS_UD',
                                                                                   indexTypeKey='UD-'+new_upload_file.upper(),  # @IgnorePep8
                                                                                   new_upload_file="tmp_newly_uploaded_file")  # @IgnorePep8

        # elastic_settings_after = elastic_factory.get_elastic_settings_with_user_uploads(elastic_settings_before)
        user_types_after = elastic_settings_after['CP_STATS_UD']['idx_type']
        self.assertTrue(len(user_types_after) > 0, "Has user idx_types ")
        self.assertTrue('UD-TMP_NEWLY_UPLOADED_FILE' in user_types_after)
        self.assertEqual(user_types_after['UD-TMP_NEWLY_UPLOADED_FILE']['type'], 'tmp_newly_uploaded_file')
    def tearDown(self):
        ''' Remove loaded test indices and test repository. '''

        key = 'PRIVATE_REGIONS_GFF'
        if key in IDX.keys():
            print(ElasticSettings.url() + '/' + IDX[key]['indexName'])
            requests.delete(ElasticSettings.url() + '/' + IDX[key]['indexName'])
Ejemplo n.º 4
0
def _find_snp_position(snp_track, name):
    if snp_track is None:
        query = ElasticQuery.query_match("id", name)
        elastic = Search(query, idx=ElasticSettings.idx('MARKER'))
        snpResult = elastic.get_json_response()
        if(len(snpResult['hits']['hits'])) > 0:
            snp = snpResult['hits']['hits'][0]['_source']
            chrom = snp['seqid'].replace('chr', "")
            position = snp['start']
            return {'chr': chrom, 'start': (position-1), 'end': position, 'name': name}
    else:
        mo = re.match(r"(.*)-(.*)", snp_track)
        (group, track) = mo.group(1, 2)
        try:
            snp_track_idx = ElasticSettings.idx('CP_STATS_'+group.upper(), snp_track.upper())
        except SettingsError:
            snp_track_idx = ElasticSettings.idx('CP_STATS_'+group.upper())+"/"+track

        query = ElasticQuery.query_match("name", name)
        elastic = Search(query, idx=snp_track_idx)
        snpResult = elastic.get_json_response()
        if(len(snpResult['hits']['hits'])) > 0:
            snp = snpResult['hits']['hits'][0]['_source']
            chrom = snp['seqid'].replace('chr', "")
            position = snp['start']
            return {'chr': chrom, 'start': (position-1), 'end': position, 'name': name}

    return {'error': 'Marker '+name+' does not exist in the currently selected dataset'}
Ejemplo n.º 5
0
def suggester(request):
    ''' Provide auto suggestions. Ajax request returning a JSON response. '''
    query_dict = request.GET
    idx_dict = ElasticSettings.search_props(query_dict.get("idx"), request.user)
    suggester = ','.join(ElasticSettings.idx(k) for k in idx_dict['suggester_keys'])
    resp = Suggest.suggest(query_dict.get("term"), suggester, name='suggest', size=8)['suggest']
    return JsonResponse({"data": [opts['text'] for opts in resp[0]['options']]})
    def test_create_idx_type_model_permissions(self):
        elastic_settings_before = ElasticSettings.attrs().get('IDX')
        user_types_before = elastic_settings_before['CP_STATS_UD']['idx_type']
        self.assertEqual({}, user_types_before,
                         'CP_STATS_UD idx_type is empty')

        idx = "cp:hg19_userdata_bed"
        new_upload_file = "tmp_newly_uploaded_file"
        idx_type = new_upload_file

        os.system(
            "curl -XPUT " + ElasticSettings.url() + "/" + idx + "/_mapping/" +
            idx_type + " -d '{\"" + idx_type +
            "\":{ \"properties\" : {\"message\" : {\"type\" : \"string\", \"store\" : true } } }}'"
        )

        os.system("curl -XPUT " + ElasticSettings.url() + "/" + idx + "/" +
                  idx_type + "/_meta -d '{\"label\": \"" + new_upload_file +
                  "\", \"owner\": \"" + self.user.username +
                  "\", \"uploaded\": \"" + str(timezone.now()) + "\"}'")

        elastic_settings_after = elastic_factory.create_idx_type_model_permissions(
            self.user,
            indexKey='CP_STATS_UD',
            indexTypeKey='UD-' + new_upload_file.upper(),  # @IgnorePep8
            new_upload_file="tmp_newly_uploaded_file")  # @IgnorePep8

        # elastic_settings_after = elastic_factory.get_elastic_settings_with_user_uploads(elastic_settings_before)
        user_types_after = elastic_settings_after['CP_STATS_UD']['idx_type']
        self.assertTrue(len(user_types_after) > 0, "Has user idx_types ")
        self.assertTrue('UD-TMP_NEWLY_UPLOADED_FILE' in user_types_after)
        self.assertEqual(
            user_types_after['UD-TMP_NEWLY_UPLOADED_FILE']['type'],
            'tmp_newly_uploaded_file')
Ejemplo n.º 7
0
 def test_show(self):
     self.assertTrue(
         Snapshot.show(ElasticSettings.getattr('REPOSITORY'), '_all',
                       False))
     self.assertTrue(
         Snapshot.show(ElasticSettings.getattr('REPOSITORY'), '_all', True))
     self.assertFalse(Snapshot.show('xyzabc', '_all', False))
    def filter_queryset(self, request, queryset, view):
        ''' Override this method to request just the documents required from elastic. '''
        q_size = view.paginator.get_limit(request)
        q_from = view.paginator.get_offset(request)

        filterable = getattr(view, 'filter_fields', [])
        print(filterable)
        print(request)
        filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])
        criteria_idx = self._get_index(filters.get('feature_type', 'GENE_CRITERIA'))

        idx = criteria_idx
        if type(criteria_idx) == list:
            idx = ','.join(ElasticSettings.idx(name) for name in criteria_idx)
        else:
            idx = ElasticSettings.idx(criteria_idx)

        q = ElasticQuery(Query.match_all())
        s = Search(search_query=q, idx=idx, size=q_size, search_from=q_from)
        json_results = s.get_json_response()
        results = []
        for result in json_results['hits']['hits']:
            new_obj = ElasticObject(initial=result['_source'])
            new_obj.uuid = result['_id']
            new_obj.criteria_type = result['_type']
            results.append(new_obj)
        view.es_count = json_results['hits']['total']
        return results
Ejemplo n.º 9
0
def get_criteria(docs, doc_type, doc_attr, idx_type_key):
    """ Return a dictionary of gene name:criteria. """
    genes = [getattr(doc, doc_attr).lower() for doc in docs if doc.type() == doc_type]
    query = Query.terms("Name", genes)
    sources = {"exclude": ["Primary id", "Object class", "Total score"]}
    if ElasticSettings.idx("CRITERIA", idx_type_key) is None:
        return {}
    res = Search(
        ElasticQuery(query, sources=sources), idx=ElasticSettings.idx("CRITERIA", idx_type_key), size=len(genes)
    ).search()
    criteria = {}

    for doc in res.docs:
        od = collections.OrderedDict(sorted(doc.__dict__.items(), key=lambda t: t[0]))
        gene_name = getattr(doc, "Name")
        criteria[gene_name] = [
            {attr.replace("_Hs", ""): value.split(":")}
            for attr, value in od.items()
            if attr != "Name" and attr != "_meta" and attr != "OD_Hs" and not value.startswith("0")
        ]
        if hasattr(doc, "OD_Hs") and not getattr(doc, "OD_Hs").startswith("0"):
            if gene_name not in criteria:
                criteria[gene_name] = []
            criteria[gene_name].append({"OD": getattr(doc, "OD_Hs").split(":")})

    return criteria
Ejemplo n.º 10
0
def _find_snp_position(snp_track, name):
    if snp_track is None:
        query = ElasticQuery.query_match("id", name)
        elastic = Search(query, idx=ElasticSettings.idx('MARKER'))
        snpResult = elastic.get_json_response()
        if(len(snpResult['hits']['hits'])) > 0:
            snp = snpResult['hits']['hits'][0]['_source']
            chrom = snp['seqid'].replace('chr', "")
            position = snp['start']
            return {'chr': chrom, 'start': (position-1), 'end': position, 'name': name}
    else:
        mo = re.match(r"(.*)-(.*)", snp_track)
        (group, track) = mo.group(1, 2)
        try:
            snp_track_idx = ElasticSettings.idx('CP_STATS_'+group.upper(), snp_track.upper())
        except SettingsError:
            snp_track_idx = ElasticSettings.idx('CP_STATS_'+group.upper())+"/"+track

        query = ElasticQuery.query_match("name", name)
        elastic = Search(query, idx=snp_track_idx)
        snpResult = elastic.get_json_response()
        if(len(snpResult['hits']['hits'])) > 0:
            snp = snpResult['hits']['hits'][0]['_source']
            chrom = snp['seqid'].replace('chr', "")
            position = snp['start']
            return {'chr': chrom, 'start': (position-1), 'end': position, 'name': name}

    return {'error': 'Marker '+name+' does not exist in the currently selected dataset'}
Ejemplo n.º 11
0
def _get_marker_build(idx_name):
    ''' Get the marker build as defined in the settings. '''
    try:
        idx_key = ElasticSettings.get_idx_key_by_name(idx_name)
        return ElasticSettings.get_label(idx_key, label='build')
    except (KeyError, SettingsError, TypeError):
        logger.error('Marker build not identified from ELASTIC settings.')
        return ''
 def test_top_hits_sub_agg(self):
     sub_agg = Agg('idx_top_hits', 'top_hits', {"size": 1})
     aggs = Aggs([Agg("idxs", "terms", {"field": "_index"}, sub_agg=sub_agg),
                  Agg("categories", "terms", {"field": "_type", "size": 0})])
     search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))
     buckets = search.search().aggs['idxs'].get_docs_in_buckets()
     self.assertEqual(buckets[ElasticSettings.idx('DEFAULT')]['doc_count'], 3)
     self.assertEqual(len(buckets[ElasticSettings.idx('DEFAULT')]['docs']), 1)
Ejemplo n.º 13
0
    def tearDown(self):
        ''' Remove loaded test indices and test repository. '''

        key = 'PRIVATE_REGIONS_GFF'
        if key in IDX.keys():
            print(ElasticSettings.url() + '/' + IDX[key]['indexName'])
            requests.delete(ElasticSettings.url() + '/' +
                            IDX[key]['indexName'])
 def test_sort_query(self):
     ''' Test sorting for a query. '''
     query = ElasticQuery(Query.match_all())
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'), qsort=Sort('start:asc,_score'))
     self._check_sort_order(elastic.search().docs)
     qsort = Sort({"sort": [{"start": {"order": "asc", "mode": "avg"}}]})
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'), qsort=qsort)
     self._check_sort_order(elastic.search().docs)
     self.assertRaises(QueryError, Sort, 1)
    def test_term_query(self):
        ''' Test building and running a match query. '''
        query = ElasticQuery(Query.term("id", "rs2476601"))
        elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
        self.assertTrue(len(elastic.search().docs) == 1, "Elastic string query retrieved marker (rs2476601)")

        query = ElasticQuery(Query.term("seqid", "1", boost=3.0))
        elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
        self.assertTrue(len(elastic.search().docs) > 1, "Elastic string query retrieved markers  on chr1")
Ejemplo n.º 16
0
    def __init__(self, search_query=None, aggs=None, search_from=0, size=20,
                 search_type=None, idx=ElasticSettings.idx('DEFAULT'), idx_type='',
                 qsort=None, elastic_url=None):
        ''' Set up parameters to use in the search. L{ElasticQuery} is used to
        define a search query.
        @type  search_query: L{ElasticQuery}
        @keyword search_query: The elastic query to search (default: None).
        @type  aggs: L{Aggs}
        @keyword aggs: Aggregations used in the search.
        @type  search_from: integer
        @keyword search_from: Offset used in paginations (default: 0).
        @type  size: integer
        @keyword size: maximum number of hits to return (default: 20).
        @type search_type: bool
        @keyword search_type: Set search type = count for aggregations.
        @type  idx: string
        @keyword idx: index to search (default: default index defined in settings).
        @type  idx_type: string
        @keyword idx_type: index type (default: '').
        @type  qsort: Sort
        @keyword qsort: defines sorting for the query.
        @type  url: string
        @keyword url: Elastic URL (default: default cluster URL).
        '''
        if search_query is not None:
            if not isinstance(search_query, ElasticQuery):
                raise QueryError("not an ElasticQuery")
            self.query = search_query.query

        if aggs is not None:
            if hasattr(self, 'query'):
                self.query.update(aggs.aggs)
            else:
                self.query = aggs.aggs

        if qsort is not None:
            if not isinstance(qsort, Sort):
                raise QueryError("not a Sort")
            if hasattr(self, 'query'):
                self.query.update(qsort.qsort)
            else:
                logger.error("no query to sort")

        if elastic_url is None:
            elastic_url = ElasticSettings.url()

        self.size = size
        self.search_from = search_from
        self.search_type = search_type
        self.idx = idx
        self.idx_type = idx_type
        self.elastic_url = elastic_url
        if self.search_type is None:
            self.url = (self.idx + '/' + self.idx_type +
                        '/_search?size=' + str(self.size) + '&from='+str(self.search_from))
        else:
            self.url = (self.idx + '/' + self.idx_type + '/_search?search_type='+search_type)
Ejemplo n.º 17
0
 def test_get_label(self):
     ''' Test method for getting the index or type label in the settings. '''
     self.assertRaises(SettingsError, ElasticSettings.get_label, 'ABC')
     self.assertTrue(
         isinstance(
             ElasticSettings.get_label('MARKER',
                                       idx_type='MARKER',
                                       label='description'), str))
     self.assertTrue(isinstance(ElasticSettings.get_label('MARKER'), str))
    def test_scan_and_scroll(self):
        ''' Test scan and scroll interface. '''
        def check_hits(resp_json):
            self.assertTrue('hits' in resp_json, 'scan and scroll hits')
            self.assertGreaterEqual(len(resp_json['hits']['hits']), 1)

        ScanAndScroll.scan_and_scroll(ElasticSettings.idx('DEFAULT'), call_fun=check_hits)
        ScanAndScroll.scan_and_scroll(ElasticSettings.idx('DEFAULT'), call_fun=check_hits,
                                      query=ElasticQuery.query_string("rs2476601", fields=["id"]))
    def get_elastic_settings_with_user_uploads(cls, elastic_dict=None, new_upload_file=None):
        '''Get the updated elastic settings with user uploaded idx_types'''

        idx_key = 'CP_STATS_UD'
        idx = ElasticSettings.idx(idx_key)

        ''' Check if an index type exists in elastic and later check there is a contenttype/model for the given elastic index type. '''  # @IgnorePep8
        elastic_url = ElasticSettings.url()
        url = idx + '/_mapping'
        response = Search.elastic_request(elastic_url, url, is_post=False)
        ''' why don't we use Search.get_mapping ? I guess it's not a class method'''
        #logger.debug(response.json())
        if "error" in response.json():
            logger.warn(response.json())
            return None

        # get idx_types from _mapping
        elastic_mapping = json.loads(response.content.decode("utf-8"))
        # here if we use aliasing then idx can be different
        # this causes problems as it's effectively hardcoded
       # this should fix to handle things where aliases are deployed
        idx = list(elastic_mapping.keys())[0]
        idx_types = list(elastic_mapping[idx]['mappings'].keys())

        if elastic_dict is None:
            elastic_dict = ElasticSettings.attrs().get('IDX')

        idx_type_dict = {}

        existing_ct = [ct.name for ct in ContentType.objects.filter(app_label=cls.PERMISSION_MODEL_APP_NAME)]

        for idx_type in idx_types:

            idx_type_with_suffix = idx_type + cls.PERMISSION_MODEL_TYPE_SUFFIX

            for ct in existing_ct:
                if ct.endswith(idx_type_with_suffix):

                    meta_url = idx + '/' + idx_type + '/_meta/_source'
                    meta_response = Search.elastic_request(elastic_url, meta_url, is_post=False)

                    try:
                        elastic_meta = json.loads(meta_response.content.decode("utf-8"))
                        label = elastic_meta['label']
                    except:
                        label = "UD-" + idx_type

                    idx_type_dict['UD-' + idx_type.upper()] = {'label': label, 'type': idx_type}

        if new_upload_file is not None:
            idx_type = new_upload_file
            label = "UD-" + idx_type
            idx_type_dict['UD-' + idx_type.upper()] = {'label': label, 'type': idx_type}

        elastic_dict['CP_STATS_UD']['idx_type'] = idx_type_dict
        return elastic_dict
def tearDownModule():
    if os.path.exists(TEST_DATA_DIR + "/STAGE"):
        shutil.rmtree(TEST_DATA_DIR + "/STAGE")
    # remove index created
    INI_CONFIG = IniParser().read_ini(MY_INI_FILE)
    requests.delete(ElasticSettings.url() + "/" + INI_CONFIG["GENE_HISTORY"]["index"])
    requests.delete(ElasticSettings.url() + "/" + INI_CONFIG["DBSNP"]["index"])
    os.remove(MY_INI_FILE)
    ens_dir = os.path.join(TEST_DATA_DIR, "DOWNLOAD", "ENSMART_GENE")
    if os.path.exists(ens_dir):
        shutil.rmtree(ens_dir)
 def test_query_ids(self):
     ''' Test by query ids. '''
     query = ElasticQuery(Query.ids(['1', '2']))
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'), size=5)
     docs = elastic.search().docs
     self.assertTrue(len(docs) == 2, "Elastic string query retrieved marker (rs*)")
     idx_type = docs[0].type()
     query = ElasticQuery(Query.ids('2', types=idx_type))
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'), size=5)
     docs = elastic.search().docs
     self.assertTrue(len(docs) == 1, "Elastic string query retrieved marker (rs*)")
Ejemplo n.º 22
0
    def factory(hit):
        ''' Factory method for creating specific document object based on
        index type of the hit.
        @type  hit: dict
        @param hit: Elasticsearch hit.
        '''
        (idx, idx_type) = ElasticSettings.get_idx_key_by_name(hit['_index'], idx_type_name=hit['_type'])
        if idx is None or idx_type is None:
            return PydginDocument(hit)

        doc_class = ElasticSettings.get_label(idx, idx_type, label='class')
        return doc_class(hit) if doc_class is not None else PydginDocument(hit)
Ejemplo n.º 23
0
 def add_arguments(self, parser):
     parser.add_argument('--snapshot',
                         dest='snapshot',
                         default='_all',
                         help='Snapshot name')
     parser.add_argument('--repo',
                         dest='repo',
                         default=ElasticSettings.getattr('REPOSITORY'),
                         metavar=ElasticSettings.getattr('REPOSITORY'),
                         help='Repository name')
     parser.add_argument('--all',
                         dest='all',
                         action='store_true',
                         help='List all repositories')
Ejemplo n.º 24
0
    def factory(hit):
        ''' Factory method for creating types of documents based on
        their elasticsearch index type.
        @type  hit: dict
        @param hit: elasticsearch hit.
        '''
        (idx, idx_type) = ElasticSettings.get_idx_key_by_name(hit['_index'], idx_type_name=hit['_type'])
        if idx is None or idx_type is None:
            return PydginDocument(hit)

        doc_class_str = ElasticSettings.get_label(idx, idx_type, label='class')
        doc_class = import_string(doc_class_str) if doc_class_str is not None else None

        return doc_class(hit) if doc_class is not None else PydginDocument(hit)
    def test_term(self):
        ''' Terms Aggregation '''
        agg_name = "test"
        agg = Agg(agg_name, "terms", {"field": "seqid", "size": 0})
        aggs = Aggs(agg)
        search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))
        r_aggs = search.search().aggs
        self.assertTrue(agg_name in r_aggs, "returned test aggregations")

        ''' Ids Query with Terms Aggregation'''
        query = ElasticQuery(Query.ids(['1', '2']))
        search = Search(search_query=query, aggs=aggs, idx=ElasticSettings.idx('DEFAULT'), size=5)
        r_aggs = search.search().aggs
        self.assertTrue(len(r_aggs[agg_name].get_buckets()) > 0, "returned test aggregation buckets")
        self.assertTrue(getattr(r_aggs[agg_name], 'buckets')[0]['doc_count'] >= 0, "bucket document count")
    def test_mapping(self):
        ''' Test retrieving the mapping for an index. '''
        elastic = Search(idx=ElasticSettings.idx('DEFAULT'))
        mapping = elastic.get_mapping()
        self.assertTrue(ElasticSettings.idx('DEFAULT') in mapping, "Database name in mapping result")
        if ElasticSettings.idx('DEFAULT') in mapping:
            self.assertTrue("mappings" in mapping[ElasticSettings.idx('DEFAULT')], "Mapping result found")

        # check using the index type
        mapping = elastic.get_mapping('marker')
        self.assertTrue(ElasticSettings.idx('DEFAULT') in mapping, "Database name in mapping result")

        # err check
        mapping = elastic.get_mapping('marker/xx')
        self.assertTrue('error' in mapping, "Database name in mapping result")
Ejemplo n.º 27
0
    def get_criteria_index_types(cls, idx_key):

        idx = ElasticSettings.idx(idx_key)
        elastic_url = ElasticSettings.url()
        url = idx + '/_mappings'
        response = Search.elastic_request(elastic_url, url, is_post=False)

        if "error" in response.json():
            logger.warn(response.json())
            return None

        # get idx_types from _mapping
        elastic_mapping = json.loads(response.content.decode("utf-8"))
        idx_types = list(elastic_mapping[idx]['mappings'].keys())
        return idx_types
Ejemplo n.º 28
0
 def setUp(self):
     # Every test needs access to the request factory.
     self.factory = RequestFactory()
     self.group, created = Group.objects.get_or_create(name='READ')  # @UnusedVariable
     self.user = User.objects.create_user(username='******', email='*****@*****.**', password='******')
     self.user.groups.add(self.group)
     (idx_keys_auth, idx_type_keys_auth) = get_authenticated_idx_and_idx_types(  # @UnusedVariable
                                             user=self.user, idx_keys=None, idx_type_keys=None)
     for target in getattr(chicp_settings, 'CP_TARGET'):
         if 'CP_TARGET_'+target not in idx_keys_auth:
             continue
         elasticJSON = Search(idx=ElasticSettings.idx('CP_TARGET_'+target)).get_mapping(mapping_type="gene_target")
         tissueList = list(elasticJSON[ElasticSettings.idx('CP_TARGET_'+target)]
                           ['mappings']['gene_target']['_meta']['tissue_type'].keys())
         utils.tissues['CP_TARGET_'+target] = tissueList
Ejemplo n.º 29
0
def marker_page(request):
    ''' Renders a gene page. '''
    query_dict = request.GET
    marker = query_dict.get("m")
    if marker is None:
        messages.error(request, 'No gene name given.')
        raise Http404()

    fields = ['id', 'rscurrent'] if marker.startswith("rs") else ['name']
    sub_agg = Agg('top_hits', 'top_hits', {"size": 15})
    aggs = Aggs(Agg("types", "terms", {"field": "_type"}, sub_agg=sub_agg))
    query = ElasticQuery(Query.query_string(marker, fields=fields))
    elastic = Search(search_query=query, idx=ElasticSettings.idx('MARKER'), aggs=aggs, size=0)
    res = elastic.search()
    if res.hits_total >= 1:
        types = getattr(res.aggs['types'], 'buckets')
        marker_doc = None
        ic_docs = []
        history_docs = []
        for doc_type in types:
            hits = doc_type['top_hits']['hits']['hits']
            for hit in hits:
                doc = Document(hit)
                if 'marker' == doc_type['key']:
                    marker_doc = doc
                elif 'immunochip' == doc_type['key']:
                    ic_docs.append(doc)
                elif 'rs_merge' == doc_type['key']:
                    history_docs.append(doc)

        criteria = {}
        if marker_doc is not None:
            if ElasticSettings.idx('CRITERIA') is not None:
                criteria = views.get_criteria([marker_doc], 'marker', 'id', 'MARKER')
            marker_doc.marker_build = _get_marker_build(ElasticSettings.idx('MARKER'))

        context = {
            'marker': marker_doc,
            'old_dbsnp_docs': _get_old_dbsnps(marker),
            'ic': ic_docs,
            'history': history_docs,
            'criteria': criteria
        }
        return render(request, 'marker/marker.html', context,
                      content_type='text/html')
    elif res.hits_total == 0:
        messages.error(request, 'Marker '+marker+' not found.')
        raise Http404()
Ejemplo n.º 30
0
    def scan_and_scroll(self, idx, call_fun=None, idx_type='', url=None,
                        time_to_keep_scoll=1, query=None):
        ''' Scan and scroll an index and optionally provide a function argument to
        process the hits. '''
        if url is None:
            url = ElasticSettings.url()

        url_search_scan = (idx + '/' + idx_type + '/_search?search_type=scan&scroll=' +
                           str(time_to_keep_scoll) + 'm')
        if query is None:
            query = {
                "query": {"match_all": {}},
                "size":  1000
            }
        else:
            if not isinstance(query, ElasticQuery):
                raise QueryError("not a Query")
            query = query.query

        response = Search.elastic_request(url, url_search_scan, data=json.dumps(query))
        _scroll_id = response.json()['_scroll_id']
        url_scan_scroll = '_search/scroll?scroll=' + str(time_to_keep_scoll) + 'm'

        count = 0
        while True:
            response = Search.elastic_request(url, url_scan_scroll, data=_scroll_id)
            _scroll_id = response.json()['_scroll_id']
            hits = response.json()['hits']['hits']
            nhits = len(hits)
            if nhits == 0:
                break
            count += nhits
            if call_fun is not None:
                call_fun(response.json())
        logger.debug("Scanned No. Docs ( "+idx+"/"+idx_type+" ) = "+str(count))
Ejemplo n.º 31
0
 def get_hits_by_study_id(cls, study_id, sources=[]):
     ''' Get visible/authenticated hits. '''
     hits_query = ElasticQuery(BoolQuery(must_arr=Query.term('dil_study_id', study_id),
                                         b_filter=Filter(Query.missing_terms("field", "group_name"))),
                               sources=sources)
     docs = Search(hits_query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'), size=1000).search().docs
     ens_ids = [gene for doc in docs if getattr(doc, 'genes') for gene in getattr(doc, 'genes')]
     gene_docs = utils.get_gene_docs_by_ensembl_id(ens_ids, ['symbol'])
     for doc in docs:
         if getattr(doc, 'genes'):
             genes = {}
             for ens_id in getattr(doc, 'genes'):
                 try:
                     genes[ens_id] = getattr(gene_docs[ens_id], 'symbol')
                 except KeyError:
                     genes = {ens_id: ens_id}
             setattr(doc, 'genes', genes)
         build_info = getattr(doc, 'build_info')
         for bi in build_info:
             if bi['build'] == settings.DEFAULT_BUILD:
                 setattr(doc, "loc", "chr" + bi['seqid'] + ":" +
                         str(locale.format("%d", bi['start'], grouping=True)) + "-" +
                         str(locale.format("%d", bi['end'], grouping=True)))
                 setattr(doc, "encoded_loc", "chr" + bi['seqid'] + "%3A" +
                         str(bi['start']) + ".." + str(bi['end']))
     return docs
Ejemplo n.º 32
0
def _get_pub_docs_by_pmid(pmids, sources=None):
    """ Get the gene symbols for the corresponding array of ensembl IDs.
    A dictionary is returned with the key being the ensembl ID and the
    value the gene document. """
    query = ElasticQuery(Query.ids(pmids), sources=sources)
    elastic = Search(query, idx=ElasticSettings.idx("PUBLICATION"), size=len(pmids))
    return {doc.doc_id(): doc for doc in elastic.search().docs}
Ejemplo n.º 33
0
    def filter_queryset(self, request, queryset, view):
        ''' Override this method to request just the documents required from Rserve. '''
        try:
            filterable = getattr(view, 'filter_fields', [])
            filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])

            mid1 = filters.get('marker', 'rs2476601')
            dataset = filters.get('dataset', 'EUR').replace('-', '')
            query = ElasticQuery(BoolQuery(must_arr=[Query.term("id", mid1)]), sources=['seqid', 'start'])
            elastic = Search(search_query=query, idx=ElasticSettings.idx('MARKER', 'MARKER'), size=1)
            doc = elastic.search().docs[0]
            seqid = getattr(doc, 'seqid')

            rserve = getattr(settings, 'RSERVE')
            conn = pyRserve.connect(host=rserve.get('HOST'), port=rserve.get('PORT'))
            pop_str = conn.r.get_pop(dataset, seqid, mid1)

            pops = json.loads(str(pop_str))
            populations = []
            for pop in pops:
                pops[pop]['population'] = pop
                populations.append(pops[pop])
            conn.close()
            return [ElasticObject(initial={'populations': populations, 'marker': mid1})]
        except (TypeError, ValueError, IndexError, ConnectionError):
            return [ElasticObject(initial={'populations': None, 'marker': mid1})]
Ejemplo n.º 34
0
 def get_diseases(self):
     ''' Overridden get diseases for feature. '''
     if super(RegionDocument, self).get_diseases():
         idx = ElasticSettings.idx('REGION_CRITERIA')
         diseases = [getattr(d, "code") for d in Criteria.get_disease_tags(getattr(self, "region_id"), idx=idx)]
         return diseases
     return []
    def test_gene_interactions(self):
        '''Fetch random genes from elastic and compare the same with the results fetched via ensembl restful query'''
        # elastic doc example:
        # "_source":{"interaction_source": "intact", "interactors": [
        # {"interactor": "ENSG00000206053", "pubmed": "16169070"},
        # {"interactor": "ENSG00000101474", "pubmed": "16169070"},
        # {"interactor": "ENSG00000065361", "pubmed": "16169070"},
        # {"interactor": "ENSG00000085465", "pubmed": "16169070"}]}

        idx_key = 'GENE'
        idx_type_key = 'INTERACTIONS'

        idx = ElasticSettings.idx(idx_key, idx_type_key)
        (idx, idx_type) = idx.split('/')

        # Test doc count
        doc_count = DataIntegrityUtils.get_docs_count(idx, idx_type)
        self.assertGreater(doc_count, 23000, 'Gene doc count greater than 60000')

        # Get interaction doc - passing the interaction source and id . Also test with random id
        (child_doc_bioplex, parent_doc_bioplex) = self.get_interaction_doc("bioplex", "ENSG00000241186")
        self.check_bioplex_data(child_doc_bioplex, parent_doc_bioplex)

        (child_doc_bioplex, parent_doc_bioplex) = self.get_interaction_doc("bioplex")
        self.check_bioplex_data(child_doc_bioplex, parent_doc_bioplex)

        (child_doc_intact, parent_doc_intact) = self.get_interaction_doc("intact", parent_id="ENSG00000090776")
        self.check_intact_data(child_doc_intact, parent_doc_intact)

        (child_doc_intact, parent_doc_intact) = self.get_interaction_doc("intact")
        self.check_intact_data(child_doc_intact, parent_doc_intact)
Ejemplo n.º 36
0
def chicpeaFileUpload(request, url):
    filesDict = request.FILES
    files = filesDict.getlist("files[]")
    print(files)
    snpTracks = list()
    idx = getattr(chicp_settings, 'CHICP_IDX').get('userdata').get('INDEX')

    for f in files:
        line = f.readlines()[0].decode()
        if line.startswith("#"):
            line = f.readlines()[1].decode()

        parts = re.split("\t", line)
        if re.match("\s", line):
            parts = re.split("\s", line)

        if len(parts) != 5:
            logger.warn("WARNING: unexpected number of columns ("+len(parts)+"): "+line)
            continue

        f.seek(0)
        bedFile = NamedTemporaryFile(delete=False)
        bedFile.write(f.read())
        bedFile.close()
        idx_type = os.path.basename(bedFile.name)
        snpTracks.append({"value": idx_type, "text":  f.name})
        os.system("curl -XDELETE '"+ElasticSettings.url()+"/"+idx+"/"+idx_type+"'")
        call_command("index_search", indexName=idx, indexType=idx_type, indexBED=bedFile.name)
        logger.debug("--indexName "+idx+" --indexType "+idx_type+" --indexBED "+bedFile.name)
        bedFile.delete

    context = dict()
    context['userSNPTracks'] = snpTracks
    return HttpResponse(json.dumps(context), content_type="application/json")
Ejemplo n.º 37
0
 def get_diseases(self):
     ''' Overridden get diseases for feature. '''
     if super(StudyDocument, self).get_diseases():
         diseases = [getattr(d, "code") for d in
                     Criteria.get_disease_tags(self.get_name(), idx=ElasticSettings.idx('STUDY_CRITERIA'))]
         return diseases
     return []
    def test_filter(self):
        ''' Filter Aggregation '''
        agg = [Agg('test_filter', 'filter', RangeQuery('start', gt='25000')),
               Agg('avg_start', 'avg', {"field": 'start'}),
               Agg('min_start', 'min', {"field": 'start'}),
               Agg('sum_start', 'sum', {"field": 'start'}),
               Agg('stats_start', 'stats', {"field": 'start'}),
               Agg('count_start', 'value_count', {"field": 'start'}),
               Agg('ext_stats_start', 'extended_stats', {"field": 'start'})]
        aggs = Aggs(agg)
        search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))

        r_aggs = search.search().aggs
        self.assertTrue('avg_start' in r_aggs, "returned avg aggregation")
        self.assertTrue('min_start' in r_aggs, "returned min aggregation")

        stats_keys = ["min", "max", "sum", "count", "avg"]
        self.assertTrue(all(hasattr(r_aggs['stats_start'], k)
                            for k in stats_keys),
                        "returned min aggregation")

        stats_keys.extend(["sum_of_squares", "variance", "std_deviation", "std_deviation_bounds"])
        self.assertTrue(all(hasattr(r_aggs['ext_stats_start'], k)
                            for k in stats_keys),
                        "returned min aggregation")
Ejemplo n.º 39
0
 def get_gene_docs_by_ensembl_id(cls, ens_ids, sources=None):
     ''' Get the gene symbols for the corresponding array of ensembl IDs.
     A dictionary is returned with the key being the ensembl ID and the
     value the gene document. '''
     query = ElasticQuery(Query.ids(ens_ids), sources=sources)
     elastic = Search(query, idx=ElasticSettings.idx('GENE', idx_type='GENE'), size=len(ens_ids))
     return {doc.doc_id(): doc for doc in elastic.search().docs}
 def test_string_query(self):
     ''' Test building and running a string query. '''
     query = ElasticQuery.query_string("rs2476601", fields=["id"])
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     docs = elastic.search()
     self.assertTrue(len(docs.docs) == 1, "Elastic string query retrieved marker (rs2476601)")
     self.assertRaises(QueryError, ElasticQuery.query_string, "rs2476601", fieldssss=["id"])
 def test_missing_terms_filtered_query(self):
     ''' Test filtered query with a missing terms filter. '''
     terms_filter = TermsFilter.get_missing_terms_filter("field", "group_name")
     query = ElasticQuery.filtered(Query.match_all(), terms_filter)
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     docs = elastic.search().docs
     self.assertTrue(len(docs) == 3, "Elastic string query retrieved all public docs")
Ejemplo n.º 42
0
 def _get_random_marker(self):
     ''' Get a random marker from the dbSNP elastic index. '''
     (idx, idx_type) = ElasticSettings.idx('MARKER', 'MARKER').split('/')
     seqid = random.randint(1, 10)
     qbool = BoolQuery(must_arr=[Query.term("seqid", seqid), RangeQuery("tags.weight", gte=80)])
     doc = ElasticUtils.get_rdm_docs(idx, idx_type, qbool=qbool, sources=['id', 'start'], size=1)[0]
     return getattr(doc, 'id')
Ejemplo n.º 43
0
    def gene_in_region(cls, hit, section=None, config=None, result_container={}):

        try:
            padded_region_doc = utils.Region.pad_region_doc(Document(hit))
        except:
            logger.warn('Region padding error ')
            return result_container

        # 'build_info': {'end': 22411939, 'seqid': '1', 'build': 38, 'start': 22326008}, 'region_id': '1p36.12_008'}
        region_id = getattr(padded_region_doc, "region_id")
        region_name = getattr(padded_region_doc, "region_name")
        build_info = getattr(padded_region_doc, "build_info")
        diseases = getattr(padded_region_doc, "tags")['disease']
        seqid = build_info['seqid']
        start = build_info['start']
        end = build_info['end']

        gene_index = ElasticSettings.idx('GENE', idx_type='GENE')
        elastic = Search.range_overlap_query(seqid=seqid, start_range=start, end_range=end,
                                             idx=gene_index, field_list=['start', 'stop', '_id'],
                                             seqid_param="chromosome",
                                             end_param="stop", size=10000)
        result_docs = elastic.search().docs

        genes = set()
        for doc in result_docs:
            genes.add(doc.doc_id())

        result_container_populated = cls.populate_container(region_id,
                                                            region_name,
                                                            fnotes=None, features=genes,
                                                            diseases=diseases,
                                                            result_container=result_container)
        return result_container_populated
Ejemplo n.º 44
0
    def test_get_criteria_details(self):
        config = IniParser().read_ini(MY_INI_FILE)
        idx = ElasticSettings.idx('MARKER_CRITERIA')
        available_criterias = MarkerCriteria.get_available_criterias(config=config)['marker']
        idx_type = ','.join(available_criterias)

        doc_by_idx_type = ElasticUtils.get_rdm_docs(idx, idx_type, size=1)
        self.assertTrue(len(doc_by_idx_type) == 1)
        feature_id = getattr(doc_by_idx_type[0], 'qid')

        criteria_details = MarkerCriteria.get_criteria_details(feature_id, config=config)

        hits = criteria_details['hits']
        first_hit = hits[0]
        _type = first_hit['_type']
        _index = first_hit['_index']
        _id = first_hit['_id']
        _source = first_hit['_source']

        disease_tag = _source['disease_tags'][0]
        self.assertTrue(feature_id, _id)
        self.assertIn(_type, idx_type)
        self.assertTrue(idx, _index)
        self.assertIn(disease_tag, list(_source.keys()))

        fdetails = _source[disease_tag][0]
        self.assertIn('fid', fdetails.keys())
        self.assertIn('fname', fdetails.keys())
Ejemplo n.º 45
0
def association_stats(request, sources=None):
    ''' Get association statistics for a given marker ID. '''
    seqid = request.GET.get('chr').replace('chr', '')
    idx_type = request.GET.get('idx_type').upper()
    start = request.GET.get('start')
    end = request.GET.get('end')
    data = []

    def get_stats(resp_json):
        hits = resp_json['hits']['hits']
        for hit in hits:
            d = Document(hit)
            data.append({
                "CHROM": getattr(d, 'seqid'),
                "POS": getattr(d, 'position'),
                "PVALUE": getattr(d, 'p_value'),
                "DBSNP_ID": getattr(d, 'marker')
            })

    query = ElasticQuery(Query.query_string(seqid, fields=["seqid"]), sources=sources)
    if start is not None and end is not None:
        query = ElasticQuery(BoolQuery(must_arr=[Query.query_string(seqid, fields=["seqid"]),
                                                 RangeQuery("position", gte=start, lte=end)]), 
                             sources=sources)
    ScanAndScroll.scan_and_scroll(ElasticSettings.idx('IC_STATS', idx_type), call_fun=get_stats, query=query)

    json = {"variants": data}
    return JsonResponse(json)
    def get_interaction_doc(self, interaction_source='intact', parent_id=None):

        idx_key = 'GENE'
        idx_type_key = 'INTERACTIONS'
        parent_idx_key = 'GENE'

        idx = ElasticSettings.idx(idx_key, idx_type_key)
        (idx, idx_type) = idx.split('/')

        if parent_id:
            qbool_intact = BoolQuery().must([Query.term("interaction_source", interaction_source),
                                            Query.term("_parent", parent_id)])
        else:
            qbool_intact = BoolQuery().should([Query.term("interaction_source", interaction_source)])

        # Get random doc or specific if id is passed in query
        docs_by_geneid = DataIntegrityUtils.get_rdm_docs(idx, idx_type, qbool=qbool_intact, sources=[], size=1)
        doc = docs_by_geneid[0]

        # Get parent doc
        parent_id = doc.parent()
        parent_docs = DataIntegrityUtils.fetch_from_elastic(idx_key, parent_idx_key, [parent_id])

        if parent_docs:
            self.assertTrue(len(parent_docs) >= 1, "Found 1 parent")
            parent_doc = parent_docs[0]
            return doc, parent_doc
        else:
            return self.get_interaction_doc("intact", parent_id)
    def test_gene_criteria_types(self):
        """Test if the indexes have records"""
        idx_key = "GENE_CRITERIA"
        feature_type = "gene"
        idx = ElasticSettings.idx(idx_key)

        idx_types = CriteriaDataIntegrityUtils.get_criteria_index_types(idx_key)
        gene_criterias = Criteria.get_available_criterias(feature_type)

        CriteriaDataIntegrityTestUtils().test_criteria_types(idx, idx_types, gene_criterias["gene"])
        CriteriaDataIntegrityTestUtils().test_criteria_mappings(idx, idx_types)

        # get random doc for each type ['gene_in_region', 'cand_gene_in_region', 'cand_gene_in_study', 'is_gene_in_mhc']
        idx_type = "gene_in_region"
        doc_by_idx_type = ElasticUtils.get_rdm_docs(idx, idx_type, size=1)
        self.assertTrue(len(doc_by_idx_type) == 1, "got back one document")
        gene_in_region_doc = doc_by_idx_type[0]

        #         {'score': 10, 'CRO': [{'fname': '4p11', 'fid': '4p11_005'}],
        #          '_meta': {'_type': 'gene_in_region', '_score': 0.9997835,
        #                    '_index': 'pydgin_imb_criteria_gene', '_id': 'ENSG00000250753'},
        #          'disease_tags': ['CRO'], 'qid': 'ENSG00000250753'}

        qid = getattr(gene_in_region_doc, "qid")
        print(qid)
        disease_tags = getattr(gene_in_region_doc, "disease_tags")
        #         ENSG00000248482
        #         ['IBD', 'UC']
        #         [{'fid': '5q31.1_013', 'fname': '5q31.1'}]
        #         [{'fid': '5q31.1_013', 'fname': '5q31.1'}]
        fnotes = getattr(gene_in_region_doc, disease_tags[0])
        region_id = fnotes[0]["fid"]
        print(region_id)
 def test_significant_terms(self):
     ''' Significant Terms Aggregation '''
     agg = Agg("test_significant_terms", "significant_terms", {"field": "start"})
     aggs = Aggs(agg)
     search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))
     r_aggs = search.search().aggs
     self.assertTrue('test_significant_terms' in r_aggs, "returned aggregations")
Ejemplo n.º 49
0
 def test_indices_str(self):
     idx_str = ElasticSettings.indices_str()
     self.assertTrue(idx_str in [
         IDX['GFF_GENERIC']['indexName'] + ',' +
         IDX['MARKER']['indexName'], IDX['MARKER']['indexName'] + ',' +
         IDX['GFF_GENERIC']['indexName']
     ])
Ejemplo n.º 50
0
def show_es_gene_section(gene_symbol=None,
                         seqid=None,
                         start_pos=None,
                         end_pos=None):
    ''' Template inclusion tag to render a gene section given a
    chado gene feature. '''
    seqid = str(seqid).replace('chr', '')
    if gene_symbol is not None:
        ''' gene symbol query'''
        query = ElasticQuery.query_match("symbol", gene_symbol)
    elif end_pos is None:
        ''' start and end are same, range query for snp'''
        query_bool = BoolQuery(must_arr=[
            Query.match("chromosome", seqid),
            RangeQuery("start", lte=start_pos),
            RangeQuery("stop", gte=start_pos)
        ])
        query = ElasticQuery.bool(query_bool)
    else:
        ''' start and end are same, range query for snp'''
        query_bool = BoolQuery(must_arr=[
            Query.match("chromosome", seqid),
            RangeQuery("start", gte=start_pos),
            RangeQuery("stop", lte=end_pos)
        ])
        query = ElasticQuery.bool(query_bool)

    elastic = Search(query, idx=ElasticSettings.idx(name='GENE'))
    return {'es_genes': elastic.search().docs}
Ejemplo n.º 51
0
def _get_old_dbsnps(marker):
    ''' Get markers from old versions of DBSNP. Assumes the index key is
    prefixed by 'MARKER_'. '''
    old_dbsnps_names = sorted([ElasticSettings.idx(k) for k in ElasticSettings.getattr('IDX').keys()
                               if 'MARKER_' in k], reverse=True)
    old_dbsnp_docs = []
    if len(old_dbsnps_names) > 0:
        search_query = ElasticQuery(Query.query_string(marker, fields=['id', 'rscurrent']))
        for idx_name in old_dbsnps_names:
            elastic2 = Search(search_query=search_query, idx=idx_name, idx_type='marker')
            docs = elastic2.search().docs
            if len(docs) > 0:
                old_doc = docs[0]
                old_doc.marker_build = _get_marker_build(idx_name)
                old_dbsnp_docs.append(old_doc)
    return old_dbsnp_docs
Ejemplo n.º 52
0
def tearDownModule():
    # remove index created
    INI_CONFIG = IniParser().read_ini(MY_PUB_INI_FILE)
    requests.delete(ElasticSettings.url() + '/' + INI_CONFIG['DISEASE']['index'])
    os.remove(MY_PUB_INI_FILE)
    if os.path.exists(TEST_DATA_DIR + '/STAGE'):
        shutil.rmtree(TEST_DATA_DIR + '/STAGE')
Ejemplo n.º 53
0
def show_es_gene_section(gene_symbol=None,
                         seqid=None,
                         start_pos=None,
                         end_pos=None):
    ''' Template inclusion tag to render a gene section given a
    chado gene feature. '''
    if seqid is not None and isinstance(seqid,
                                        str) and seqid.startswith("chr"):
        seqid = seqid
    else:
        seqid = 'chr' + str(seqid)
    if gene_symbol is not None:
        ''' gene symbol query'''
        query = ElasticQuery.query_match("gene_symbol", gene_symbol)
    elif end_pos is None:
        ''' start and end are same, range query for snp'''
        query_bool = BoolQuery(must_arr=[
            Query.match("seqid", seqid),
            RangeQuery("featureloc.start", lte=start_pos),
            RangeQuery("featureloc.end", gte=start_pos)
        ])
        query = ElasticQuery.bool(query_bool)
    else:
        ''' start and end are same, range query for snp'''
        query_bool = BoolQuery(must_arr=[
            Query.match("seqid", seqid),
            RangeQuery("featureloc.start", gte=start_pos),
            RangeQuery("featureloc.end", lte=end_pos)
        ])
        query = ElasticQuery.bool(query_bool)

    elastic = Search(query, idx=ElasticSettings.idx(name='GENE'))
    return {'es_genes': elastic.search().docs}
Ejemplo n.º 54
0
    def get_models_to_delete(self):
        '''Get models to delete'''
        idx_key = 'CP_STATS_UD'
        idx = ElasticSettings.idx(idx_key)
        ''' Check if an index type exists in elastic and later check there is a contenttype/model for the given elastic index type. '''  # @IgnorePep8
        elastic_url = ElasticSettings.url()
        url = idx + '/_mapping'
        response = Search.elastic_request(elastic_url, url, is_post=False)

        if "error" in response.json():
            logger.warn(response.json())
            return None

        # get idx_types from _mapping
        elastic_mapping = json.loads(response.content.decode("utf-8"))
        ## fix needed if we deploy aliasing for indices
        idx = list(elastic_mapping.keys())[0]
        idx_types = list(elastic_mapping[idx]['mappings'].keys())

        models2go = []
        expire_days = 7  # 1 week

        # add idx_types that have no docs
        for idx_type in idx_types:
            ndocs = Search(idx=idx, idx_type=idx_type).get_count()['count']

            if (ndocs > 0):
                models2go.append(idx_type)

            # add idx_types that were not accessed for a given time period
            url = idx + '/' + idx_type + '/_meta'
            response = Search.elastic_request(elastic_url, url, is_post=False)
            elastic_meta = json.loads(response.content.decode("utf-8"))
            if '_source' in elastic_meta:
                uploaded_str_date = elastic_meta['_source']['uploaded']
                yymmdd_str = uploaded_str_date.split()[0]
                # Format: 2015-11-03 14:43:54.099645+00:00
                from datetime import datetime as dt
                dt = dt.strptime(yymmdd_str, '%Y-%m-%d')
                uploaded_date = dt.date()

                d1 = datetime.date.today()
                d2 = d1 - datetime.timedelta(days=expire_days)
                if uploaded_date < d2:
                    models2go.append(idx_type)

        return models2go
Ejemplo n.º 55
0
    def test_search_props(self):

        if 'pydgin_auth' in settings.INSTALLED_APPS:
            from pydgin_auth.elastic_model_factory import ElasticPermissionModelFactory
            from django.contrib.contenttypes.models import ContentType
            from django.contrib.auth.models import Group, User, Permission
            from django.shortcuts import get_object_or_404

            ElasticPermissionModelFactory.create_dynamic_models()
            search_props = ElasticSettings.search_props("ALL")

            idx = search_props['idx']
            idx_keys = search_props['idx_keys']
            idx_type = search_props['idx_type']

            self.assertIn('publications', idx, 'publications found in idx')
            self.assertIn('MARKER', idx_keys, 'MARKER found in idx_keys')
            self.assertIn('rs_merge', idx_type, 'rs_merge found in idx_type')

            # CREATE DIL group and add test_dil user to that group
            dil_group, created = Group.objects.get_or_create(name='DILX')
            self.assertTrue(created)
            dil_user = User.objects.create_user(
                username='******', email='*****@*****.**', password='******')
            dil_user.groups.add(dil_group)
            self.assertTrue(dil_user.groups.filter(name='DILX').exists())

            # create permission for MARKER and IC
            test_model_name = 'marker-ic_idx_type'
            # create permissions on models and retest again to check if the idx type could be seen
            content_type, created = ContentType.objects.get_or_create(
                model=test_model_name, app_label="elastic",
            )

            # get the permission ... already created
            can_read_permission = Permission.objects.get(content_type=content_type)
            self.assertEqual('can_read_marker-ic_idx_type', can_read_permission.codename, "idx type permission correct")
            # as soon as the permission is set for an index, the index becomes a restricted resource
            idx_types_visible = ElasticSettings.search_props("ALL")["idx_type"]
            self.assertFalse('immunochip' in idx_types_visible,  'immunochip idx type not visible')

            # now grant permission to dil_user and check if idx type is visible
            dil_group.permissions.add(can_read_permission)
            dil_user = get_object_or_404(User, pk=dil_user.id)
            idx_types_visible = ElasticSettings.search_props("ALL", dil_user)["idx_type"]
            self.assertTrue('immunochip' in idx_types_visible,  'immunochip idx type visible now')
Ejemplo n.º 56
0
 def exists(cls, repo, snapshot):
     ''' Test if the repository/snapshot exists. '''
     url = ElasticSettings.url() + '/_snapshot/' + repo + '/' + snapshot
     resp = requests.get(url)
     if resp.status_code != 200:
         return False
     else:
         return True
Ejemplo n.º 57
0
 def delete_repository(cls, repo):
     url = ElasticSettings.url() + '/_snapshot/' + repo
     resp = requests.delete(url)
     if resp.status_code != 200:
         logger.error("Status (" + url + "): " + str(resp.status_code) +
                      " :: " + str(resp.json()["error"]))
         return False
     return True
 def test_top_hits(self):
     ''' Top Hits Aggregation '''
     agg = [Agg('test_filter', 'filter', RangeQuery('start', gt='2000')),
            Agg('test_top_hits', 'top_hits', {"size": 1})]
     aggs = Aggs(agg)
     search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))
     hits = search.search().aggs['test_top_hits'].get_hits()
     self.assertTrue(len(hits) == 1, "returned the top hit")
 def test_missing(self):
     ''' Missing Aggregation '''
     agg = Agg("test_missing", "missing", {"field": "seqid"})
     aggs = Aggs(agg)
     search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))
     r_aggs = search.search().aggs
     self.assertTrue(getattr(r_aggs['test_missing'], 'doc_count') == 0,
                     "no missing seqid fields")
Ejemplo n.º 60
0
 def get_count(self):
     ''' Return the elastic count for a query result '''
     url = self.idx + '/' + self.idx_type + '/_count?'
     data = {}
     if hasattr(self, 'query'):
         data = json.dumps(self.query)
     response = Search.elastic_request(ElasticSettings.url(), url, data=data)
     return response.json()