def test_pubs_disease_tags(self):
        ''' Check the number of disease publications against the number of tags.disease and
        report differences`. '''
        count = True
        msg = ''
        for disease in DiseasePublicationTest.DISEASES:
            pmids = self._get_pmids(disease)
            disease_code = disease.lower()
            elastic = Search(search_query=ElasticQuery(BoolQuery(
                         b_filter=Filter(Query.term('tags.disease', disease_code))), sources=['pmid']),
                         idx=ElasticSettings.idx('PUBLICATION'), size=len(pmids)*2)
            res = elastic.get_count()
            msg += disease_code+'\tINDEX: '+str(res['count'])+'\tNCBI: '+str(len(pmids))
            if res['count'] != len(pmids):
                count = False
                docs = elastic.search().docs
                pmids_in_idx = [getattr(doc, 'pmid') for doc in docs]
                pmids_diff1 = [pmid for pmid in pmids_in_idx if pmid not in pmids]
                pmids_diff2 = [pmid for pmid in pmids if pmid not in pmids_in_idx]
                if len(pmids_diff1) > 0:
                    msg += '\textra PMIDs: '+str(pmids_diff1)
                if len(pmids_diff2) > 0:
                    msg += '\tmissing PMIDs: '+str(pmids_diff2)
            msg += '\n'

        print(msg)
        self.assertTrue(count, 'Count for disease tags')
def show_es_gene_section(gene_symbol=None,
                         seqid=None,
                         start_pos=None,
                         end_pos=None):
    ''' Template inclusion tag to render a gene section given a
    chado gene feature. '''
    if seqid is not None and isinstance(seqid,
                                        str) and seqid.startswith("chr"):
        seqid = seqid
    else:
        seqid = 'chr' + str(seqid)
    if gene_symbol is not None:
        ''' gene symbol query'''
        query = ElasticQuery.query_match("gene_symbol", gene_symbol)
    elif end_pos is None:
        ''' start and end are same, range query for snp'''
        query_bool = BoolQuery(must_arr=[
            Query.match("seqid", seqid),
            RangeQuery("featureloc.start", lte=start_pos),
            RangeQuery("featureloc.end", gte=start_pos)
        ])
        query = ElasticQuery.bool(query_bool)
    else:
        ''' start and end are same, range query for snp'''
        query_bool = BoolQuery(must_arr=[
            Query.match("seqid", seqid),
            RangeQuery("featureloc.start", gte=start_pos),
            RangeQuery("featureloc.end", lte=end_pos)
        ])
        query = ElasticQuery.bool(query_bool)

    elastic = Search(query, idx=ElasticSettings.idx(name='GENE'))
    return {'es_genes': elastic.search().docs}
Exemple #3
0
def show_es_gene_section(gene_symbol=None,
                         seqid=None,
                         start_pos=None,
                         end_pos=None):
    ''' Template inclusion tag to render a gene section given a
    chado gene feature. '''
    seqid = str(seqid).replace('chr', '')
    if gene_symbol is not None:
        ''' gene symbol query'''
        query = ElasticQuery.query_match("symbol", gene_symbol)
    elif end_pos is None:
        ''' start and end are same, range query for snp'''
        query_bool = BoolQuery(must_arr=[
            Query.match("chromosome", seqid),
            RangeQuery("start", lte=start_pos),
            RangeQuery("stop", gte=start_pos)
        ])
        query = ElasticQuery.bool(query_bool)
    else:
        ''' start and end are same, range query for snp'''
        query_bool = BoolQuery(must_arr=[
            Query.match("chromosome", seqid),
            RangeQuery("start", gte=start_pos),
            RangeQuery("stop", lte=end_pos)
        ])
        query = ElasticQuery.bool(query_bool)

    elastic = Search(query, idx=ElasticSettings.idx(name='GENE'))
    return {'es_genes': elastic.search().docs}
Exemple #4
0
def _get_pub_docs_by_pmid(pmids, sources=None):
    """ Get the gene symbols for the corresponding array of ensembl IDs.
    A dictionary is returned with the key being the ensembl ID and the
    value the gene document. """
    query = ElasticQuery(Query.ids(pmids), sources=sources)
    elastic = Search(query, idx=ElasticSettings.idx("PUBLICATION"), size=len(pmids))
    return {doc.doc_id(): doc for doc in elastic.search().docs}
 def test_significant_terms(self):
     ''' Significant Terms Aggregation '''
     agg = Agg("test_significant_terms", "significant_terms", {"field": "start"})
     aggs = Aggs(agg)
     search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))
     r_aggs = search.search().aggs
     self.assertTrue('test_significant_terms' in r_aggs, "returned aggregations")
 def test_missing_terms_filtered_query(self):
     ''' Test filtered query with a missing terms filter. '''
     terms_filter = TermsFilter.get_missing_terms_filter("field", "group_name")
     query = ElasticQuery.filtered(Query.match_all(), terms_filter)
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     docs = elastic.search().docs
     self.assertTrue(len(docs) == 3, "Elastic string query retrieved all public docs")
 def test_string_query(self):
     ''' Test building and running a string query. '''
     query = ElasticQuery.query_string("rs2476601", fields=["id"])
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     docs = elastic.search()
     self.assertTrue(len(docs.docs) == 1, "Elastic string query retrieved marker (rs2476601)")
     self.assertRaises(QueryError, ElasticQuery.query_string, "rs2476601", fieldssss=["id"])
def setUpModule():
    ''' Change ini config (MY_INI_FILE) to use the test suffix when
    creating pipeline indices. '''
    ini_file = os.path.join(os.path.dirname(__file__), 'test_download.ini')
    if os.path.isfile(MY_INI_FILE):
        return

    with open(MY_INI_FILE, 'w') as new_file:
        with open(ini_file) as old_file:
            for line in old_file:
                new_file.write(line.replace('auto_tests', IDX_SUFFIX))

    '''load ensembl GTF  and GENE_HISTORY'''
    INI_CONFIG = IniParser().read_ini(MY_INI_FILE)
    idx = INI_CONFIG['ENSEMBL_GENE_GTF']['index']

    call_command('pipeline', '--steps', 'load', sections='GENE_HISTORY',
                 dir=TEST_DATA_DIR, ini=MY_INI_FILE)
    call_command('pipeline', '--steps', 'stage', 'load', sections='ENSEMBL_GENE_GTF',
                 dir=TEST_DATA_DIR, ini=MY_INI_FILE)
    Search.index_refresh(idx)
    call_command('pipeline', '--steps', 'load', sections='GENE2ENSEMBL',
                 dir=TEST_DATA_DIR, ini=MY_INI_FILE)

    Search.index_refresh(idx)
Exemple #9
0
    def get_disease_tags(cls, feature_id, idx=None, idx_type=None):
        ''' function to get the aggregated list of disease_tags for a given feature id, aggregated
            from all criteria_types for a feature type
        @type  feature_id: string
        @keyword feature_id: Id of the feature (gene => gene_id, region=>region_id)
              @type  idx: string
        @param idx: name of the index
        @type  idx_type: string
        @param idx_type: name of the idx type, each criteria is an index type
        '''
        query = ElasticQuery(Query.term("qid", feature_id))
        agg = Agg("criteria_disease_tags", "terms", {"field": "disease_tags", "size": 0})
        aggs = Aggs(agg)

        if idx_type:
            search = Search(query, aggs=aggs, idx=idx, idx_type=idx_type)
        else:
            search = Search(query, aggs=aggs, idx=idx)

        disease_tags = []
        try:
            r_aggs = search.search().aggs
            buckets = r_aggs['criteria_disease_tags'].get_buckets()
            disease_tags = [dis_dict['key'].lower() for dis_dict in buckets]
        except:
            return []

        # get disease docs
        if (len(disease_tags) > 0):
            (core, other) = Disease.get_site_diseases(dis_list=disease_tags)
            diseases = list(core)
            diseases.extend(other)
            return diseases
        else:
            return None
Exemple #10
0
def _find_snp_position(snp_track, name):
    if snp_track is None:
        query = ElasticQuery.query_match("id", name)
        elastic = Search(query, idx=ElasticSettings.idx('MARKER'))
        snpResult = elastic.get_json_response()
        if(len(snpResult['hits']['hits'])) > 0:
            snp = snpResult['hits']['hits'][0]['_source']
            chrom = snp['seqid'].replace('chr', "")
            position = snp['start']
            return {'chr': chrom, 'start': (position-1), 'end': position, 'name': name}
    else:
        mo = re.match(r"(.*)-(.*)", snp_track)
        (group, track) = mo.group(1, 2)
        try:
            snp_track_idx = ElasticSettings.idx('CP_STATS_'+group.upper(), snp_track.upper())
        except SettingsError:
            snp_track_idx = ElasticSettings.idx('CP_STATS_'+group.upper())+"/"+track

        query = ElasticQuery.query_match("name", name)
        elastic = Search(query, idx=snp_track_idx)
        snpResult = elastic.get_json_response()
        if(len(snpResult['hits']['hits'])) > 0:
            snp = snpResult['hits']['hits'][0]['_source']
            chrom = snp['seqid'].replace('chr', "")
            position = snp['start']
            return {'chr': chrom, 'start': (position-1), 'end': position, 'name': name}

    return {'error': 'Marker '+name+' does not exist in the currently selected dataset'}
Exemple #11
0
    def filter_queryset(self, request, queryset, view):
        ''' Override this method to request just the documents required from elastic. '''
        q_size = view.paginator.get_limit(request)
        q_from = view.paginator.get_offset(request)

        filterable = getattr(view, 'filter_fields', [])
        filters = dict([(k, v) for k, v in request.GET.items()
                        if k in filterable])
        search_filters = self._build_filters(filters=filters)
        if search_filters is not None:
            q = ElasticQuery.filtered(Query.match_all(), search_filters)
        else:
            q = ElasticQuery(Query.match_all())
        s = Search(search_query=q,
                   idx=getattr(view, 'idx'),
                   size=q_size,
                   search_from=q_from)
        json_results = s.get_json_response()
        results = []
        for result in json_results['hits']['hits']:
            new_obj = ElasticObject(initial=result['_source'])
            new_obj.uuid = result['_id']
            results.append(new_obj)
        view.es_count = json_results['hits']['total']
        return results
 def get_gene_docs_by_ensembl_id(cls, ens_ids, sources=None):
     ''' Get the gene symbols for the corresponding array of ensembl IDs.
     A dictionary is returned with the key being the ensembl ID and the
     value the gene document. '''
     query = ElasticQuery(Query.ids(ens_ids), sources=sources)
     elastic = Search(query, idx=ElasticSettings.idx('GENE', idx_type='GENE'), size=len(ens_ids))
     return {doc.doc_id(): doc for doc in elastic.search().docs}
Exemple #13
0
    def filter_queryset(self, request, queryset, view):
        ''' Override this method to request just the documents required from Rserve. '''
        try:
            filterable = getattr(view, 'filter_fields', [])
            filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])

            mid1 = filters.get('marker', 'rs2476601')
            dataset = filters.get('dataset', 'EUR').replace('-', '')
            query = ElasticQuery(BoolQuery(must_arr=[Query.term("id", mid1)]), sources=['seqid', 'start'])
            elastic = Search(search_query=query, idx=ElasticSettings.idx('MARKER', 'MARKER'), size=1)
            doc = elastic.search().docs[0]
            seqid = getattr(doc, 'seqid')

            rserve = getattr(settings, 'RSERVE')
            conn = pyRserve.connect(host=rserve.get('HOST'), port=rserve.get('PORT'))
            pop_str = conn.r.get_pop(dataset, seqid, mid1)

            pops = json.loads(str(pop_str))
            populations = []
            for pop in pops:
                pops[pop]['population'] = pop
                populations.append(pops[pop])
            conn.close()
            return [ElasticObject(initial={'populations': populations, 'marker': mid1})]
        except (TypeError, ValueError, IndexError, ConnectionError):
            return [ElasticObject(initial={'populations': None, 'marker': mid1})]
    def test_filter(self):
        ''' Filter Aggregation '''
        agg = [Agg('test_filter', 'filter', RangeQuery('start', gt='25000')),
               Agg('avg_start', 'avg', {"field": 'start'}),
               Agg('min_start', 'min', {"field": 'start'}),
               Agg('sum_start', 'sum', {"field": 'start'}),
               Agg('stats_start', 'stats', {"field": 'start'}),
               Agg('count_start', 'value_count', {"field": 'start'}),
               Agg('ext_stats_start', 'extended_stats', {"field": 'start'})]
        aggs = Aggs(agg)
        search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))

        r_aggs = search.search().aggs
        self.assertTrue('avg_start' in r_aggs, "returned avg aggregation")
        self.assertTrue('min_start' in r_aggs, "returned min aggregation")

        stats_keys = ["min", "max", "sum", "count", "avg"]
        self.assertTrue(all(hasattr(r_aggs['stats_start'], k)
                            for k in stats_keys),
                        "returned min aggregation")

        stats_keys.extend(["sum_of_squares", "variance", "std_deviation", "std_deviation_bounds"])
        self.assertTrue(all(hasattr(r_aggs['ext_stats_start'], k)
                            for k in stats_keys),
                        "returned min aggregation")
    def filter_queryset(self, request, queryset, view):
        ''' Override this method to request just the documents required from elastic. '''
        q_size = view.paginator.get_limit(request)
        q_from = view.paginator.get_offset(request)

        filterable = getattr(view, 'filter_fields', [])
        print(filterable)
        print(request)
        filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])
        criteria_idx = self._get_index(filters.get('feature_type', 'GENE_CRITERIA'))

        idx = criteria_idx
        if type(criteria_idx) == list:
            idx = ','.join(ElasticSettings.idx(name) for name in criteria_idx)
        else:
            idx = ElasticSettings.idx(criteria_idx)

        q = ElasticQuery(Query.match_all())
        s = Search(search_query=q, idx=idx, size=q_size, search_from=q_from)
        json_results = s.get_json_response()
        results = []
        for result in json_results['hits']['hits']:
            new_obj = ElasticObject(initial=result['_source'])
            new_obj.uuid = result['_id']
            new_obj.criteria_type = result['_type']
            results.append(new_obj)
        view.es_count = json_results['hits']['total']
        return results
 def test_top_hits(self):
     ''' Top Hits Aggregation '''
     agg = [Agg('test_filter', 'filter', RangeQuery('start', gt='2000')),
            Agg('test_top_hits', 'top_hits', {"size": 1})]
     aggs = Aggs(agg)
     search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))
     hits = search.search().aggs['test_top_hits'].get_hits()
     self.assertTrue(len(hits) == 1, "returned the top hit")
 def test_top_hits_sub_agg(self):
     sub_agg = Agg('idx_top_hits', 'top_hits', {"size": 1})
     aggs = Aggs([Agg("idxs", "terms", {"field": "_index"}, sub_agg=sub_agg),
                  Agg("categories", "terms", {"field": "_type", "size": 0})])
     search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))
     buckets = search.search().aggs['idxs'].get_docs_in_buckets()
     self.assertEqual(buckets[ElasticSettings.idx('DEFAULT')]['doc_count'], 3)
     self.assertEqual(len(buckets[ElasticSettings.idx('DEFAULT')]['docs']), 1)
 def test_missing(self):
     ''' Missing Aggregation '''
     agg = Agg("test_missing", "missing", {"field": "seqid"})
     aggs = Aggs(agg)
     search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))
     r_aggs = search.search().aggs
     self.assertTrue(getattr(r_aggs['test_missing'], 'doc_count') == 0,
                     "no missing seqid fields")
Exemple #19
0
    def fetch_overlapping_features(cls, build, seqid, start, end, idx=None, idx_type=None, disease_id=None):
        ''' function to create fetch overlapping features for a given stretch of region
            the build info is stored as nested document..so nested query is build
        @type  build: string
        @param build: build info eg: 'GRCh38'
        @type  seqid: string
        @param seqid: chromosome number
        @type  start:  string
        @param start: region start
        @type  end:  string
        @param end: region end
        @type  idx: string
        @param idx: name of the index
        @type  idx_type: string
        @param idx_type: name of the idx type, each criteria is an index type
        @type  disease_id:  string
        @param disease_id: disease code
        '''
        nbuild = build
        start_range = start
        end_range = end

        bool_range = BoolQuery()
        bool_range.must(RangeQuery("build_info.start", lte=start_range)) \
                  .must(RangeQuery("build_info.end", gte=end_range))

        or_filter = OrFilter(RangeQuery("build_info.start", gte=start_range, lte=end_range))

        or_filter.extend(RangeQuery("build_info.end", gte=start_range, lte=end_range)) \
                 .extend(bool_range)

        bool_query = BoolQuery()

        if disease_id:
            qnested_buildinfo = Query.nested('build_info', bool_query)
            bool_query = BoolQuery()
            bool_query.must(Query.term("disease", disease_id.lower())).must(qnested_buildinfo)
            qnested = ElasticQuery(bool_query, sources=['build_info.*',
                                                        'disease_locus',
                                                        'disease',
                                                        'chr_band',
                                                        'species'])

        else:
            bool_query.must(Query.term("build_info.build", nbuild)) \
                  .must(Query.term("build_info.seqid", seqid)) \
                  .filter(or_filter)

            qnested = ElasticQuery(Query.nested('build_info', bool_query), sources=['build_info.*',
                                                                                    'disease_locus',
                                                                                    'disease',
                                                                                    'chr_band',
                                                                                    'species'])

        elastic = Search(qnested, idx=idx, idx_type=idx_type)
        res = elastic.search()
        return res.docs
 def test_bool_filtered_query(self):
     ''' Test building and running a filtered boolean query. '''
     query_bool = BoolQuery(must_not_arr=[Query.term("seqid", 2)],
                            should_arr=[RangeQuery("start", gte=10050)])
     query_bool.must([Query.term("id", "rs768019142")]) \
               .should(RangeQuery("start", gte=10054))
     query = ElasticQuery.filtered_bool(Query.match_all(), query_bool, sources=["id", "seqid"])
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     self.assertTrue(elastic.search().hits_total == 1, "Elastic filtered query retrieved marker (rs768019142)")
Exemple #21
0
def region_page(request, region):
    ''' Region elastic'''
    query = ElasticQuery.query_match("attr.region_id", region)
    elastic = Search(query, idx=ElasticSettings.idx(name='REGION'))
    context = elastic.get_result()
    context['title'] = "Region"
    print(context)
    return render(request, 'region/region.html', context,
                  content_type='text/html')
 def test_and_filtered_query(self):
     ''' Test building and running a filtered query. '''
     query_bool = BoolQuery(must_arr=[RangeQuery("start", gte=1)])
     and_filter = AndFilter(query_bool)
     and_filter.extend(RangeQuery("start", gte=1)) \
               .extend(Query.term("seqid", 1))
     query = ElasticQuery.filtered(Query.term("seqid", 1), and_filter)
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     self.assertTrue(elastic.search().hits_total >= 1, "Elastic filtered query retrieved marker(s)")
 def test_terms_avg_order(self):
     ''' Test average and order. '''
     agg_name = "test"
     sub_agg = Agg('avg_start', 'avg', {"field": "start"})
     agg = Agg(agg_name, "terms", {"field": "seqid", "size": 0, "order": {"avg_start": "desc"}}, sub_agg=sub_agg)
     search = Search(aggs=Aggs(agg), idx=ElasticSettings.idx('DEFAULT'))
     r_aggs = search.search().aggs
     self.assertTrue(agg_name in r_aggs, "returned test aggregations")
     self.assertGreater(r_aggs['test'].get_buckets()[0]['doc_count'], 1)
Exemple #24
0
 def test_url_rotate(self):
     ''' Test the url rotates from http://xxx:9200 to correct url. '''
     query = ElasticQuery.filtered(Query.term("seqid", 1),
                                   Filter(Query.term("id", "rs768019142")))
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     self.assertTrue(elastic.search().hits_total == 1,
                     "Elastic filtered query retrieved marker")
     Search.index_exists('test', 'test2')
     ElasticUrl.URL_INDEX = 0  # reset
    def get_elastic_settings_with_user_uploads(cls, elastic_dict=None, new_upload_file=None):
        '''Get the updated elastic settings with user uploaded idx_types'''

        idx_key = 'CP_STATS_UD'
        idx = ElasticSettings.idx(idx_key)

        ''' Check if an index type exists in elastic and later check there is a contenttype/model for the given elastic index type. '''  # @IgnorePep8
        elastic_url = ElasticSettings.url()
        url = idx + '/_mapping'
        response = Search.elastic_request(elastic_url, url, is_post=False)
        ''' why don't we use Search.get_mapping ? I guess it's not a class method'''
        #logger.debug(response.json())
        if "error" in response.json():
            logger.warn(response.json())
            return None

        # get idx_types from _mapping
        elastic_mapping = json.loads(response.content.decode("utf-8"))
        # here if we use aliasing then idx can be different
        # this causes problems as it's effectively hardcoded
       # this should fix to handle things where aliases are deployed
        idx = list(elastic_mapping.keys())[0]
        idx_types = list(elastic_mapping[idx]['mappings'].keys())

        if elastic_dict is None:
            elastic_dict = ElasticSettings.attrs().get('IDX')

        idx_type_dict = {}

        existing_ct = [ct.name for ct in ContentType.objects.filter(app_label=cls.PERMISSION_MODEL_APP_NAME)]

        for idx_type in idx_types:

            idx_type_with_suffix = idx_type + cls.PERMISSION_MODEL_TYPE_SUFFIX

            for ct in existing_ct:
                if ct.endswith(idx_type_with_suffix):

                    meta_url = idx + '/' + idx_type + '/_meta/_source'
                    meta_response = Search.elastic_request(elastic_url, meta_url, is_post=False)

                    try:
                        elastic_meta = json.loads(meta_response.content.decode("utf-8"))
                        label = elastic_meta['label']
                    except:
                        label = "UD-" + idx_type

                    idx_type_dict['UD-' + idx_type.upper()] = {'label': label, 'type': idx_type}

        if new_upload_file is not None:
            idx_type = new_upload_file
            label = "UD-" + idx_type
            idx_type_dict['UD-' + idx_type.upper()] = {'label': label, 'type': idx_type}

        elastic_dict['CP_STATS_UD']['idx_type'] = idx_type_dict
        return elastic_dict
 def test_bool_filtered_query2(self):
     ''' Test building and running a filtered boolean query. '''
     query_bool = BoolQuery()
     query_bool.should(RangeQuery("start", lte=20000)) \
               .should(Query.term("seqid", 2)) \
               .must(Query.term("seqid", 1))
     query_string = Query.query_string("rs768019142", fields=["id", "seqid"])
     query = ElasticQuery.filtered_bool(query_string, query_bool, sources=["id", "seqid", "start"])
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     self.assertTrue(elastic.search().hits_total == 1, "Elastic filtered query retrieved marker (rs768019142)")
 def test_filters(self):
     ''' Filters Aggregation '''
     filters = {'filters': {'start_gt': RangeQuery('start', gt='1000'),
                            'start_lt': RangeQuery('start', lt='100000')}}
     agg = Agg('test_filters', 'filters', filters)
     aggs = Aggs(agg)
     search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))
     r_aggs = search.search().aggs
     self.assertTrue('start_lt' in r_aggs['test_filters'].get_buckets(),
                     "returned avg aggregation")
Exemple #28
0
def ajax_range_overlap_search(request, src, start, stop, search_idx, ajax):
    ''' Return count or paginated range elastic result as a JSON '''
    if ajax == 'count':
        elastic = Search.range_overlap_query(src, start, stop, idx=search_idx)
        return JsonResponse(elastic.get_count())
    search_from = request.POST.get("from")
    size = request.POST.get("size")
    elastic = Search.range_overlap_query(src, start, stop, search_from=search_from,
                                         size=size, idx=search_idx)
    return JsonResponse(elastic.get_json_response())
 def test_terms_query(self):
     ''' Test building and running a match query. '''
     highlight = Highlight(["id"])
     query = ElasticQuery(Query.terms("id", ["rs2476601", "rs768019142"]), highlight=highlight)
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     docs = elastic.search().docs
     self.assertTrue(len(docs) == 2,
                     "Elastic string query retrieved markers (rs2476601, rs768019142)")
     self.assertTrue(getattr(docs[0], 'seqid'), "Hit attribute found")
     self.assertTrue(docs[0].highlight() is not None, "highlighting found")
Exemple #30
0
def ajax_search(request, query, search_idx, ajax):
    ''' Return count or paginated elastic result as a JSON '''
    if ajax == 'count':
        elastic = Search.field_search_query(query, fields=fields, idx=search_idx)
        return JsonResponse(elastic.get_count())
    search_from = request.POST.get("from")
    size = request.POST.get("size")
    elastic = Search.field_search_query(query, fields=fields, search_from=search_from,
                                        size=size, idx=search_idx)
    return JsonResponse(elastic.get_json_response())
Exemple #31
0
def _build_frags_query(frags_idx, chrom, segmin, segmax):

    query = ElasticQuery.filtered(Query.terms("seqid", [chrom, str("chr"+chrom)]),
                                  Filter(RangeQuery("end", gte=segmin, lte=segmax)),
                                  utils.bedFields)
    fragsQuery = Search(search_query=query, search_from=0, size=2000000, idx=frags_idx)

    fragsResult = fragsQuery.get_result()
    frags = fragsResult['data']
    frags = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], frags)
    return frags
 def test_or_filtered_query(self):
     ''' Test building and running a filtered query. '''
     highlight = Highlight(["id", "seqid"])
     query_bool = BoolQuery(must_arr=[RangeQuery("start", lte=1),
                                      RangeQuery("end", gte=100000)])
     or_filter = OrFilter(RangeQuery("start", gte=1, lte=100000))
     or_filter.extend(query_bool) \
              .extend(Query.query_string("rs*", fields=["id", "seqid"]).query_wrap())
     query = ElasticQuery.filtered(Query.term("seqid", 1), or_filter, highlight=highlight)
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     self.assertTrue(elastic.search().hits_total >= 1, "Elastic filtered query retrieved marker(s)")
Exemple #33
0
def region_page(request, region):
    ''' Region elastic'''
    query = ElasticQuery.query_match("attr.region_id", region)
    elastic = Search(query, idx=ElasticSettings.idx(name='REGION'))
    context = elastic.get_result()
    context['title'] = "Region"
    print(context)
    return render(request,
                  'region/region.html',
                  context,
                  content_type='text/html')
 def test_range(self):
     ''' Range Aggregation '''
     agg = Agg("test_range_agg", "range",
               {"field": "start",
                "ranges": [{"to": 10000},
                           {"from": 10000, "to": 15000}]})
     aggs = Aggs(agg)
     search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))
     r_aggs = search.search().aggs
     self.assertTrue(len(r_aggs['test_range_agg'].get_buckets()) == 2,
                     "returned two buckets in range aggregations")
    def get_rdm_docs(cls, idx, idx_type, qbool=Query.match_all(), sources=[], size=1):
        ''' Get a random doc from the indices. '''
        score_function1 = ScoreFunction.create_score_function('random_score', seed=random.randint(0, 1000000))

        search_query = ElasticQuery(FunctionScoreQuery(qbool, [score_function1], boost_mode='replace'),
                                    sources=sources)
        elastic = Search(search_query=search_query, size=size, idx=idx, idx_type=idx_type)
        try:
            return elastic.search().docs
        except IndexError:
            return cls.get_rdm_docs(idx, idx_type, qbool, sources, size)
    def test_region_idx_loader(self):
        ''' Test loader has created and populated indices.  '''

        key = 'PRIVATE_REGIONS_GFF'

        if key in IDX.keys():
            idx = IDX[key]['indexName']
            Search.index_refresh(idx)
            self.assertTrue(Search.index_exists(idx=idx), 'Index exists: '+idx)
            ndocs = Search(idx=idx).get_count()['count']
            self.assertTrue(ndocs > 0, "Elastic count documents in " + idx + ": " + str(ndocs))
    def test_bool_nested_filter(self):
        ''' Test combined Bool filter '''
        query_bool_nest = BoolQuery()
        query_bool_nest.must(Query.match("id", "rs768019142").query_wrap()) \
                       .must(Query.term("seqid", 1))

        query_bool = BoolQuery()
        query_bool.should(query_bool_nest) \
                  .should(Query.term("seqid", 2))
        query = ElasticQuery.filtered_bool(Query.match_all(), query_bool, sources=["id", "seqid", "start"])
        elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
        self.assertTrue(elastic.search().hits_total >= 1, "Nested bool filter query")
 def test_bool_query(self):
     ''' Test a bool query. '''
     query_bool = BoolQuery()
     highlight = Highlight(["id", "seqid"])
     query_bool.must(Query.term("id", "rs768019142")) \
               .must(RangeQuery("start", gt=1000)) \
               .must_not(Query.match("seqid", "2")) \
               .should(Query.match("seqid", "3")) \
               .should(Query.match("seqid", "1"))
     query = ElasticQuery.bool(query_bool, highlight=highlight)
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     self.assertTrue(len(elastic.search().docs) == 1, "Elastic string query retrieved marker (rs768019142)")
Exemple #39
0
 def test_idx_loader(self):
     ''' Test loader has created and populated indices.  '''
     for key in IDX:
         idx = IDX[key]['indexName']
         # check the index has documents, allow for the indexing to complete if necessary
         #             Search.index_refresh(idx)
         self.assertTrue(Search.index_exists(idx=idx),
                         'Index exists: ' + idx)
         ndocs = Search(idx=idx).get_count()['count']
         self.assertTrue(
             ndocs > 0,
             "Elastic count documents in " + idx + ": " + str(ndocs))
 def test_pub_ini_file2(self):
     ''' Test publication pipeline with a list of PMIDs. '''
     out = StringIO()
     call_command('publications', '--dir', TEST_DATA_DIR, '--steps', 'load',
                  sections='DISEASE::TEST', ini=MY_PUB_INI_FILE, stdout=out)
     INI_CONFIG = IniParser().read_ini(MY_PUB_INI_FILE)
     idx = INI_CONFIG['DISEASE']['index']
     Search.index_refresh(idx)
     query = ElasticQuery.query_string("test", fields=["tags.disease"])
     elastic = Search(query, idx=idx)
     docs = elastic.search().docs
     self.assertGreater(len(docs), 1)
    def test_bool_filtered_query4(self):
        ''' Test building and running a filtered boolean query.
        Note: ElasticQuery used to wrap match in a query object. '''
        query_bool = BoolQuery()
        query_bool.should(RangeQuery("start", lte=20000)) \
                  .should(Query.term("seqid", 2)) \
                  .must(Query.match("id", "rs768019142").query_wrap()) \
                  .must(Query.term("seqid", 1))

        query = ElasticQuery.filtered_bool(Query.match_all(), query_bool, sources=["id", "seqid", "start"])
        elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
        self.assertTrue(elastic.search().hits_total == 1, "Elastic filtered query retrieved marker (rs768019142)")
 def test_update_doc(self):
     ''' Update with a partial document. '''
     idx = IDX['MARKER']['indexName']
     docs = Search(ElasticQuery(Query.term("id", "rs2476601"), sources=['id']), idx=idx).search().docs
     self.assertEquals(len(docs), 1, "rs2476601 document")
     update_field = {"doc": {"start": 100, "end": 200}}
     Update.update_doc(docs[0], update_field)
     Search.index_refresh(IDX['MARKER']['indexName'])
     docs = Search(ElasticQuery(Query.term("id", "rs2476601")), idx=idx).search().docs
     self.assertEquals(len(docs), 1, "rs2476601 document")
     self.assertEquals(getattr(docs[0], 'start'), 100, "rs2476601 start")
     self.assertEquals(getattr(docs[0], 'end'), 200, "rs2476601 end")
Exemple #43
0
    def get_object(self):
        q = ElasticQuery(Query.ids(self.kwargs[self.lookup_field]))
        s = Search(search_query=q, idx=getattr(self, 'idx'))
        try:
            result = s.get_json_response()['hits']['hits'][0]
            obj = ElasticObject(initial=result['_source'])
            obj.uuid = result['_id']

            # May raise a permission denied
            self.check_object_permissions(self.request, obj)
            return obj
        except (TypeError, ValueError, IndexError):
            raise Http404
    def filter_queryset(self, request, queryset, view):
        ''' Override this method to request feature locations. '''
        try:
            filterable = getattr(view, 'filter_fields', [])
            filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])
            query_str = filters.get('feature', 'PTPN22')
            build = self._get_build(filters.get('build', settings.DEFAULT_BUILD))
            if query_str is None or query_str == '':
                return [ElasticObject(initial={'error': 'No feature name provided.'})]

            search_fields = ['id',
                             'symbol', 'dbxrefs.ensembl',
                             'region_name']
            sources = ['start', 'stop', 'seqid', 'chromosome',
                       'disease_loci']
            idxs = ElasticSettings.getattr('IDX')
            MARKER_IDX = ''

            if build == ElasticSettings.get_label('MARKER', label='build'):
                MARKER_IDX = 'MARKER'
            if MARKER_IDX == '':
                for idx in idxs:
                    if 'MARKER' in idx:
                        if build == ElasticSettings.get_label(idx, label='build'):
                            MARKER_IDX = idx

            (idx, idx_type) = ElasticSettings.idx_names(MARKER_IDX, 'MARKER')
            (idx_r, idx_type_r) = ElasticSettings.idx_names('REGION', 'REGION')
            (idx_g, idx_type_g) = ElasticSettings.idx_names('GENE', 'GENE')
            idx += ',' + idx_r + ',' + idx_g
            idx_type += ',' + idx_type_r + ',' + idx_type_g

            equery = BoolQuery(must_arr=Query.query_string(query_str, fields=search_fields))
            elastic = Search(search_query=ElasticQuery(equery, sources), size=10, idx=idx, idx_type=idx_type)
            docs = elastic.search().docs
            locs = []
            for doc in docs:
                if isinstance(doc, RegionDocument):
                    doc = Region.pad_region_doc(doc)

                loc = doc.get_position(build=build).split(':')
                pos = loc[1].replace(',', '').split('-')
                locs.append(ElasticObject(
                    {'feature': query_str,
                     'chr': loc[0],
                     'start': int(pos[0]),
                     'end': int(pos[1]) if len(pos) > 1 else int(pos[0]),
                     'locusString': query_str+" ("+str(loc[1])+")"}))
            return locs
        except (TypeError, ValueError, IndexError, ConnectionError):
            raise Http404
Exemple #45
0
def ajax_range_overlap_search(request, src, start, stop, search_idx, ajax):
    ''' Return count or paginated range elastic result as a JSON '''
    if ajax == 'count':
        elastic = Search.range_overlap_query(src, start, stop, idx=search_idx)
        return JsonResponse(elastic.get_count())
    search_from = request.POST.get("from")
    size = request.POST.get("size")
    elastic = Search.range_overlap_query(src,
                                         start,
                                         stop,
                                         search_from=search_from,
                                         size=size,
                                         idx=search_idx)
    return JsonResponse(elastic.get_json_response())
    def get_object(self):
        q = ElasticQuery(Query.ids(self.kwargs[self.lookup_field]))
        s = Search(search_query=q, idx=getattr(self, 'idx'))
        try:
            result = s.get_json_response()['hits']['hits'][0]
            obj = ElasticObject(initial=result['_source'])
            obj.uuid = result['_id']
            obj.criteria_type = result['_type']

            # May raise a permission denied
            self.check_object_permissions(self.request, obj)
            return obj
        except (TypeError, ValueError, IndexError):
            raise Http404
Exemple #47
0
    def test_region_idx_loader(self):
        ''' Test loader has created and populated indices.  '''

        key = 'PRIVATE_REGIONS_GFF'

        if key in IDX.keys():
            idx = IDX[key]['indexName']
            Search.index_refresh(idx)
            self.assertTrue(Search.index_exists(idx=idx),
                            'Index exists: ' + idx)
            ndocs = Search(idx=idx).get_count()['count']
            self.assertTrue(
                ndocs > 0,
                "Elastic count documents in " + idx + ": " + str(ndocs))
Exemple #48
0
    def post(self, request, *args, **kwargs):
        ens_id = self.request.POST.get('ens_id')
        marker = self.request.POST.get('marker')
        markers = self.request.POST.getlist('markers[]')

        if ens_id:
            sfilter = Filter(Query.query_string(ens_id, fields=["genes"]).query_wrap())
        elif marker:
            sfilter = Filter(Query.query_string(marker, fields=["marker"]).query_wrap())
        elif markers:
            sfilter = Filter(Query.query_string(' '.join(markers), fields=["marker"]).query_wrap())

        query = ElasticQuery.filtered(Query.match_all(), sfilter)
        elastic = Search(query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'), size=500)
        study_hits = elastic.get_json_response()['hits']

        ens_ids = []
        pmids = []
        for hit in study_hits['hits']:
            if 'pmid' in hit['_source']:
                pmids.append(hit['_source']['pmid'])
            if 'genes' in hit['_source']:
                for ens_id in hit['_source']['genes']:
                    ens_ids.append(ens_id)
        docs = utils.get_gene_docs_by_ensembl_id(ens_ids, ['symbol'])
        pub_docs = PublicationDocument.get_pub_docs_by_pmid(pmids, sources=['authors.name', 'journal'])

        for hit in study_hits['hits']:
            genes = {}
            if 'genes' in hit['_source']:
                for ens_id in hit['_source']['genes']:
                    try:
                        genes[ens_id] = getattr(docs[ens_id], 'symbol')
                    except KeyError:
                        genes = {ens_id: ens_id}
            hit['_source']['genes'] = genes
            if 'pmid' in hit['_source']:
                pmid = hit['_source']['pmid']
                try:
                    authors = getattr(pub_docs[pmid], 'authors')
                    journal = getattr(pub_docs[pmid], 'journal')
                    hit['_source']['pmid'] = \
                        {'pmid': pmid,
                         'author': authors[0]['name'].rsplit(None, 1)[-1] if authors else "",
                         'journal': journal}
                except KeyError:
                    hit['_source']['pmid'] = {'pmid': pmid}

        return JsonResponse(study_hits)
    def test_mapping(self):
        ''' Test retrieving the mapping for an index. '''
        elastic = Search(idx=ElasticSettings.idx('DEFAULT'))
        mapping = elastic.get_mapping()
        self.assertTrue(ElasticSettings.idx('DEFAULT') in mapping, "Database name in mapping result")
        if ElasticSettings.idx('DEFAULT') in mapping:
            self.assertTrue("mappings" in mapping[ElasticSettings.idx('DEFAULT')], "Mapping result found")

        # check using the index type
        mapping = elastic.get_mapping('marker')
        self.assertTrue(ElasticSettings.idx('DEFAULT') in mapping, "Database name in mapping result")

        # err check
        mapping = elastic.get_mapping('marker/xx')
        self.assertTrue('error' in mapping, "Database name in mapping result")
Exemple #50
0
def _build_exon_query(chrom, segmin, segmax, genes):
    # get exonic structure for genes in this section
    geneExons = dict()
    query_bool = BoolQuery()
    query_bool.must([Query.term("seqid", chrom)])
    if len(genes) > 0:
        for g in genes:
            query = ElasticQuery.filtered_bool(Query.query_string(g["gene_id"], fields=["name"]),
                                               query_bool, sources=utils.snpFields)
            elastic = Search(query, idx=getattr(chicp_settings, 'CP_GENE_IDX')+'/exons/', search_from=0, size=2000)
            result = elastic.get_result()
            exons = result['data']
            exons = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], exons)
            geneExons[g["gene_id"]] = sorted(exons, key=operator.itemgetter("start"))
    return geneExons
Exemple #51
0
def study_page(request, study):
    ''' Renders a study page. '''
    if study is None:
        messages.error(request, 'No study id given.')
        raise Http404()
    query = ElasticQuery(Query.ids(study.split(',')))
    elastic = Search(query, idx=ElasticSettings.idx('STUDY', 'STUDY'), size=5)
    res = elastic.search(obj_document=StudyDocument)
    if res.hits_total == 0:
        messages.error(request, 'Study(s) '+study+' not found.')
    elif res.hits_total < 9:
        names = ', '.join([getattr(doc, 'study_name') for doc in res.docs])
        context = {'features': res.docs, 'title': names}
        return render(request, 'study/study.html', context, content_type='text/html')
    raise Http404()
Exemple #52
0
def _build_frags_query(frags_idx, chrom, segmin, segmax):

    query = ElasticQuery.filtered(Query.terms("seqid", [chrom, str("chr"+chrom)]),
                                  Filter(RangeQuery("end", gte=segmin, lte=segmax)),
                                  utils.bedFields)
    fragsQuery = Search(search_query=query, search_from=0, size=10000, idx=frags_idx)

    # fragsResult = fragsQuery.get_result()
    # frags = fragsResult['data']
    fragsResult = fragsQuery.get_json_response()
    frags = []
    for hit in fragsResult['hits']['hits']:
        frags.append(hit['_source'])
    frags = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], frags)
    return frags
Exemple #53
0
def _build_hic_query(query, targetIdx, segmin=0, segmax=0):

    hic = []

    hicElastic = Search(query, idx=targetIdx, search_from=0, size=2000)
    hicResult = hicElastic.get_result()
    if len(hicResult['data']) > 0:
        hic = hicResult['data']
        if segmin == 0 or segmax == 0:
            (segmin, segmax) = utils.segCoords(hic)
            extension = int(0.05*(segmax-segmin))
            segmin = segmin - extension
            segmax = segmax + extension
        hic = utils.makeRelative(int(segmin), int(segmax), ['baitStart', 'baitEnd', 'oeStart', 'oeEnd'], hic)
    return hic, segmin, segmax
    def filter_queryset(self, request, queryset, view):
        """ Override this method to request just the documents required from Rserve. """
        try:
            filterable = getattr(view, "filter_fields", [])
            filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])

            mid1 = filters.get("m1")
            if mid1 is None or mid1 == "":
                return [ElasticObject(initial={"error": "No marker ID provided."})]

            dataset = filters.get("dataset", "EUR").replace("-", "")
            mid2 = filters.get("m2")
            window_size = int(filters.get("window_size", 1000000))
            dprime = filters.get("dprime", 0.0)
            rsq = filters.get("rsq", 0.8)
            maf = filters.get("maf", False)
            if maf:
                maf = True
            build_version = filters.get("build", "GRCh38").lower()
            pos = filters.get("pos", False)
            if pos:
                pos = True

            query = ElasticQuery(BoolQuery(must_arr=[Query.term("id", mid1)]), sources=["seqid", "start"])
            elastic = Search(search_query=query, idx=ElasticSettings.idx("MARKER", "MARKER"), size=1)
            doc = elastic.search().docs[0]
            seqid = getattr(doc, "seqid")

            rserve = getattr(settings, "RSERVE")
            conn = pyRserve.connect(host=rserve.get("HOST"), port=rserve.get("PORT"))
            ld_str = conn.r.ld_run(
                dataset,
                seqid,
                mid1,
                marker2=mid2,
                window_size=window_size,
                dprime=dprime,
                rsq=rsq,
                maf=maf,
                position=pos,
                build_version=build_version,
            )
            ld_str = ld_str.replace("D.prime", "dprime").replace("R.squared", "rsquared")
            conn.close()

            return [ElasticObject(initial=json.loads(str(ld_str)))]
        except (TypeError, ValueError, IndexError, ConnectionError):
            raise Http404
Exemple #55
0
def marker_page(request):
    ''' Renders a gene page. '''
    query_dict = request.GET
    marker = query_dict.get("m")
    if marker is None:
        messages.error(request, 'No gene name given.')
        raise Http404()

    fields = ['id', 'rscurrent'] if marker.startswith("rs") else ['name']
    sub_agg = Agg('top_hits', 'top_hits', {"size": 15})
    aggs = Aggs(Agg("types", "terms", {"field": "_type"}, sub_agg=sub_agg))
    query = ElasticQuery(Query.query_string(marker, fields=fields))
    elastic = Search(search_query=query, idx=ElasticSettings.idx('MARKER'), aggs=aggs, size=0)
    res = elastic.search()
    if res.hits_total >= 1:
        types = getattr(res.aggs['types'], 'buckets')
        marker_doc = None
        ic_docs = []
        history_docs = []
        for doc_type in types:
            hits = doc_type['top_hits']['hits']['hits']
            for hit in hits:
                doc = Document(hit)
                if 'marker' == doc_type['key']:
                    marker_doc = doc
                elif 'immunochip' == doc_type['key']:
                    ic_docs.append(doc)
                elif 'rs_merge' == doc_type['key']:
                    history_docs.append(doc)

        criteria = {}
        if marker_doc is not None:
            if ElasticSettings.idx('CRITERIA') is not None:
                criteria = views.get_criteria([marker_doc], 'marker', 'id', 'MARKER')
            marker_doc.marker_build = _get_marker_build(ElasticSettings.idx('MARKER'))

        context = {
            'marker': marker_doc,
            'old_dbsnp_docs': _get_old_dbsnps(marker),
            'ic': ic_docs,
            'history': history_docs,
            'criteria': criteria
        }
        return render(request, 'marker/marker.html', context,
                      content_type='text/html')
    elif res.hits_total == 0:
        messages.error(request, 'Marker '+marker+' not found.')
        raise Http404()
    def test_gene_history_loader(self):
        """ Test the gene history loading. """
        call_command("pipeline", "--steps", "load", sections="GENE_HISTORY", dir=TEST_DATA_DIR, ini=MY_INI_FILE)

        INI_CONFIG = IniParser().read_ini(MY_INI_FILE)
        idx = INI_CONFIG["GENE_HISTORY"]["index"]
        idx_type = INI_CONFIG["GENE_HISTORY"]["index_type"]
        elastic = Search(idx=idx, idx_type=idx_type)
        Search.index_refresh(idx)

        self.assertTrue(elastic.get_count()["count"] > 1, "Count documents in the index")
        map1_props = Gene.gene_history_mapping(idx, idx_type, test_mode=True).mapping_properties
        map2_props = elastic.get_mapping()
        if idx not in map2_props:
            logger.error("MAPPING ERROR: " + json.dumps(map2_props))
        self._cmpMappings(map2_props[idx]["mappings"], map1_props, idx_type)
Exemple #57
0
def region_page(request, region):
    ''' Renders a region page. '''
    if region is None:
        messages.error(request, 'No region given.')
        raise Http404()
    query = ElasticQuery(Query.ids(region.split(',')))
    elastic = Search(query, idx=ElasticSettings.idx('REGION', 'REGION'), size=5)
    res = elastic.search()
    if res.hits_total == 0:
        messages.error(request, 'Region(s) '+region+' not found.')
    elif res.hits_total < 9:
        names = ', '.join([getattr(doc, 'region_name') for doc in res.docs])
        REGIONS = [Region.pad_region_doc(doc) for doc in res.docs]
        context = {'features': REGIONS, 'title': names}
        return render(request, 'region/index.html', context, content_type='text/html')
    raise Http404()
Exemple #58
0
def disease_page(request, disease):
    ''' Renders a disease page. '''
    disease = disease.lower()
    if disease is None:
        messages.error(request, 'No disease given.')
        raise Http404()
    query = ElasticQuery(Query.terms("code", [disease.split(',')]))
    elastic = Search(query, idx=ElasticSettings.idx('DISEASE', 'DISEASE'), size=5)
    res = elastic.search()
    if res.hits_total == 0:
        messages.error(request, 'Disease(s) '+disease+' not found.')
    elif res.hits_total < 9:
        names = ', '.join([getattr(doc, 'name') for doc in res.docs])
        context = {'features': res.docs, 'title': names}
        return render(request, 'disease/index.html', context, content_type='text/html')
    raise Http404()
Exemple #59
0
def _get_old_dbsnps(marker):
    ''' Get markers from old versions of DBSNP. Assumes the index key is
    prefixed by 'MARKER_'. '''
    old_dbsnps_names = sorted([ElasticSettings.idx(k) for k in ElasticSettings.getattr('IDX').keys()
                               if 'MARKER_' in k], reverse=True)
    old_dbsnp_docs = []
    if len(old_dbsnps_names) > 0:
        search_query = ElasticQuery(Query.query_string(marker, fields=['id', 'rscurrent']))
        for idx_name in old_dbsnps_names:
            elastic2 = Search(search_query=search_query, idx=idx_name, idx_type='marker')
            docs = elastic2.search().docs
            if len(docs) > 0:
                old_doc = docs[0]
                old_doc.marker_build = _get_marker_build(idx_name)
                old_dbsnp_docs.append(old_doc)
    return old_dbsnp_docs