Exemple #1
0
def get_criteria(docs, doc_type, doc_attr, idx_type_key):
    """ Return a dictionary of gene name:criteria. """
    genes = [getattr(doc, doc_attr).lower() for doc in docs if doc.type() == doc_type]
    query = Query.terms("Name", genes)
    sources = {"exclude": ["Primary id", "Object class", "Total score"]}
    if ElasticSettings.idx("CRITERIA", idx_type_key) is None:
        return {}
    res = Search(
        ElasticQuery(query, sources=sources), idx=ElasticSettings.idx("CRITERIA", idx_type_key), size=len(genes)
    ).search()
    criteria = {}

    for doc in res.docs:
        od = collections.OrderedDict(sorted(doc.__dict__.items(), key=lambda t: t[0]))
        gene_name = getattr(doc, "Name")
        criteria[gene_name] = [
            {attr.replace("_Hs", ""): value.split(":")}
            for attr, value in od.items()
            if attr != "Name" and attr != "_meta" and attr != "OD_Hs" and not value.startswith("0")
        ]
        if hasattr(doc, "OD_Hs") and not getattr(doc, "OD_Hs").startswith("0"):
            if gene_name not in criteria:
                criteria[gene_name] = []
            criteria[gene_name].append({"OD": getattr(doc, "OD_Hs").split(":")})

    return criteria
    def test_region_attributes(self):
        ''' test region attributes '''
        idx = ElasticSettings.idx(RegionDataTest.IDX_KEY, 'REGION')
        (idx, idx_type) = idx.split('/')
        docs = ElasticUtils.get_rdm_docs(idx, idx_type, qbool=Query.match_all(), sources=[], size=1)
        newRegion = utils.Region.pad_region_doc(docs[0])

        if len(getattr(newRegion, "genes")) > 0:
            query = ElasticQuery(Query.ids(getattr(newRegion, "genes")))
            resultObject = Search(query, idx=ElasticSettings.idx('GENE', 'GENE'),
                                  size=len(getattr(newRegion, "genes"))).search()
            self.assertEqual(len(getattr(newRegion, "genes")), resultObject.hits_total,
                             "All genes on region found in GENE index")

        if len(getattr(newRegion, "studies")) > 0:
            query = ElasticQuery(Query.ids(getattr(newRegion, "studies")))
            resultObject = Search(query, idx=ElasticSettings.idx('STUDY', 'STUDY'),
                                  size=len(getattr(newRegion, "studies"))).search()
            self.assertEqual(len(getattr(newRegion, "studies")), resultObject.hits_total,
                             "All study ids for region found in STUDY index")

        if len(getattr(newRegion, "pmids")) > 0:
            query = ElasticQuery(Query.ids(getattr(newRegion, "pmids")))
            resultObject = Search(query, idx=ElasticSettings.idx('PUBLICATION', 'PUBLICATION'),
                                  size=len(getattr(newRegion, "pmids"))).search()
            self.assertEqual(len(getattr(newRegion, "pmids")), resultObject.hits_total,
                             "All PMIDs for region found in PUBLICATION index")
Exemple #3
0
def _find_snp_position(snp_track, name):
    if snp_track is None:
        query = ElasticQuery.query_match("id", name)
        elastic = Search(query, idx=ElasticSettings.idx('MARKER'))
        snpResult = elastic.get_json_response()
        if(len(snpResult['hits']['hits'])) > 0:
            snp = snpResult['hits']['hits'][0]['_source']
            chrom = snp['seqid'].replace('chr', "")
            position = snp['start']
            return {'chr': chrom, 'start': (position-1), 'end': position, 'name': name}
    else:
        mo = re.match(r"(.*)-(.*)", snp_track)
        (group, track) = mo.group(1, 2)
        try:
            snp_track_idx = ElasticSettings.idx('CP_STATS_'+group.upper(), snp_track.upper())
        except SettingsError:
            snp_track_idx = ElasticSettings.idx('CP_STATS_'+group.upper())+"/"+track

        query = ElasticQuery.query_match("name", name)
        elastic = Search(query, idx=snp_track_idx)
        snpResult = elastic.get_json_response()
        if(len(snpResult['hits']['hits'])) > 0:
            snp = snpResult['hits']['hits'][0]['_source']
            chrom = snp['seqid'].replace('chr', "")
            position = snp['start']
            return {'chr': chrom, 'start': (position-1), 'end': position, 'name': name}

    return {'error': 'Marker '+name+' does not exist in the currently selected dataset'}
Exemple #4
0
def _find_snp_position(snp_track, name):
    if snp_track is None:
        query = ElasticQuery.query_match("id", name)
        elastic = Search(query, idx=ElasticSettings.idx('MARKER'))
        snpResult = elastic.get_json_response()
        if(len(snpResult['hits']['hits'])) > 0:
            snp = snpResult['hits']['hits'][0]['_source']
            chrom = snp['seqid'].replace('chr', "")
            position = snp['start']
            return {'chr': chrom, 'start': (position-1), 'end': position, 'name': name}
    else:
        mo = re.match(r"(.*)-(.*)", snp_track)
        (group, track) = mo.group(1, 2)
        try:
            snp_track_idx = ElasticSettings.idx('CP_STATS_'+group.upper(), snp_track.upper())
        except SettingsError:
            snp_track_idx = ElasticSettings.idx('CP_STATS_'+group.upper())+"/"+track

        query = ElasticQuery.query_match("name", name)
        elastic = Search(query, idx=snp_track_idx)
        snpResult = elastic.get_json_response()
        if(len(snpResult['hits']['hits'])) > 0:
            snp = snpResult['hits']['hits'][0]['_source']
            chrom = snp['seqid'].replace('chr', "")
            position = snp['start']
            return {'chr': chrom, 'start': (position-1), 'end': position, 'name': name}

    return {'error': 'Marker '+name+' does not exist in the currently selected dataset'}
    def filter_queryset(self, request, queryset, view):
        ''' Override this method to request just the documents required from elastic. '''
        q_size = view.paginator.get_limit(request)
        q_from = view.paginator.get_offset(request)

        filterable = getattr(view, 'filter_fields', [])
        print(filterable)
        print(request)
        filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])
        criteria_idx = self._get_index(filters.get('feature_type', 'GENE_CRITERIA'))

        idx = criteria_idx
        if type(criteria_idx) == list:
            idx = ','.join(ElasticSettings.idx(name) for name in criteria_idx)
        else:
            idx = ElasticSettings.idx(criteria_idx)

        q = ElasticQuery(Query.match_all())
        s = Search(search_query=q, idx=idx, size=q_size, search_from=q_from)
        json_results = s.get_json_response()
        results = []
        for result in json_results['hits']['hits']:
            new_obj = ElasticObject(initial=result['_source'])
            new_obj.uuid = result['_id']
            new_obj.criteria_type = result['_type']
            results.append(new_obj)
        view.es_count = json_results['hits']['total']
        return results
 def test_top_hits_sub_agg(self):
     sub_agg = Agg('idx_top_hits', 'top_hits', {"size": 1})
     aggs = Aggs([Agg("idxs", "terms", {"field": "_index"}, sub_agg=sub_agg),
                  Agg("categories", "terms", {"field": "_type", "size": 0})])
     search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))
     buckets = search.search().aggs['idxs'].get_docs_in_buckets()
     self.assertEqual(buckets[ElasticSettings.idx('DEFAULT')]['doc_count'], 3)
     self.assertEqual(len(buckets[ElasticSettings.idx('DEFAULT')]['docs']), 1)
    def test_scan_and_scroll(self):
        ''' Test scan and scroll interface. '''
        def check_hits(resp_json):
            self.assertTrue('hits' in resp_json, 'scan and scroll hits')
            self.assertGreaterEqual(len(resp_json['hits']['hits']), 1)

        ScanAndScroll.scan_and_scroll(ElasticSettings.idx('DEFAULT'), call_fun=check_hits)
        ScanAndScroll.scan_and_scroll(ElasticSettings.idx('DEFAULT'), call_fun=check_hits,
                                      query=ElasticQuery.query_string("rs2476601", fields=["id"]))
 def test_sort_query(self):
     ''' Test sorting for a query. '''
     query = ElasticQuery(Query.match_all())
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'), qsort=Sort('start:asc,_score'))
     self._check_sort_order(elastic.search().docs)
     qsort = Sort({"sort": [{"start": {"order": "asc", "mode": "avg"}}]})
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'), qsort=qsort)
     self._check_sort_order(elastic.search().docs)
     self.assertRaises(QueryError, Sort, 1)
    def test_term_query(self):
        ''' Test building and running a match query. '''
        query = ElasticQuery(Query.term("id", "rs2476601"))
        elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
        self.assertTrue(len(elastic.search().docs) == 1, "Elastic string query retrieved marker (rs2476601)")

        query = ElasticQuery(Query.term("seqid", "1", boost=3.0))
        elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
        self.assertTrue(len(elastic.search().docs) > 1, "Elastic string query retrieved markers  on chr1")
 def test_query_ids(self):
     ''' Test by query ids. '''
     query = ElasticQuery(Query.ids(['1', '2']))
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'), size=5)
     docs = elastic.search().docs
     self.assertTrue(len(docs) == 2, "Elastic string query retrieved marker (rs*)")
     idx_type = docs[0].type()
     query = ElasticQuery(Query.ids('2', types=idx_type))
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'), size=5)
     docs = elastic.search().docs
     self.assertTrue(len(docs) == 1, "Elastic string query retrieved marker (rs*)")
    def test_mapping(self):
        ''' Test retrieving the mapping for an index. '''
        elastic = Search(idx=ElasticSettings.idx('DEFAULT'))
        mapping = elastic.get_mapping()
        self.assertTrue(ElasticSettings.idx('DEFAULT') in mapping, "Database name in mapping result")
        if ElasticSettings.idx('DEFAULT') in mapping:
            self.assertTrue("mappings" in mapping[ElasticSettings.idx('DEFAULT')], "Mapping result found")

        # check using the index type
        mapping = elastic.get_mapping('marker')
        self.assertTrue(ElasticSettings.idx('DEFAULT') in mapping, "Database name in mapping result")

        # err check
        mapping = elastic.get_mapping('marker/xx')
        self.assertTrue('error' in mapping, "Database name in mapping result")
    def test_term(self):
        ''' Terms Aggregation '''
        agg_name = "test"
        agg = Agg(agg_name, "terms", {"field": "seqid", "size": 0})
        aggs = Aggs(agg)
        search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))
        r_aggs = search.search().aggs
        self.assertTrue(agg_name in r_aggs, "returned test aggregations")

        ''' Ids Query with Terms Aggregation'''
        query = ElasticQuery(Query.ids(['1', '2']))
        search = Search(search_query=query, aggs=aggs, idx=ElasticSettings.idx('DEFAULT'), size=5)
        r_aggs = search.search().aggs
        self.assertTrue(len(r_aggs[agg_name].get_buckets()) > 0, "returned test aggregation buckets")
        self.assertTrue(getattr(r_aggs[agg_name], 'buckets')[0]['doc_count'] >= 0, "bucket document count")
Exemple #13
0
 def setUp(self):
     # Every test needs access to the request factory.
     self.factory = RequestFactory()
     self.group, created = Group.objects.get_or_create(name='READ')  # @UnusedVariable
     self.user = User.objects.create_user(username='******', email='*****@*****.**', password='******')
     self.user.groups.add(self.group)
     (idx_keys_auth, idx_type_keys_auth) = get_authenticated_idx_and_idx_types(  # @UnusedVariable
                                             user=self.user, idx_keys=None, idx_type_keys=None)
     for target in getattr(chicp_settings, 'CP_TARGET'):
         if 'CP_TARGET_'+target not in idx_keys_auth:
             continue
         elasticJSON = Search(idx=ElasticSettings.idx('CP_TARGET_'+target)).get_mapping(mapping_type="gene_target")
         tissueList = list(elasticJSON[ElasticSettings.idx('CP_TARGET_'+target)]
                           ['mappings']['gene_target']['_meta']['tissue_type'].keys())
         utils.tissues['CP_TARGET_'+target] = tissueList
Exemple #14
0
def marker_page(request):
    ''' Renders a gene page. '''
    query_dict = request.GET
    marker = query_dict.get("m")
    if marker is None:
        messages.error(request, 'No gene name given.')
        raise Http404()

    fields = ['id', 'rscurrent'] if marker.startswith("rs") else ['name']
    sub_agg = Agg('top_hits', 'top_hits', {"size": 15})
    aggs = Aggs(Agg("types", "terms", {"field": "_type"}, sub_agg=sub_agg))
    query = ElasticQuery(Query.query_string(marker, fields=fields))
    elastic = Search(search_query=query, idx=ElasticSettings.idx('MARKER'), aggs=aggs, size=0)
    res = elastic.search()
    if res.hits_total >= 1:
        types = getattr(res.aggs['types'], 'buckets')
        marker_doc = None
        ic_docs = []
        history_docs = []
        for doc_type in types:
            hits = doc_type['top_hits']['hits']['hits']
            for hit in hits:
                doc = Document(hit)
                if 'marker' == doc_type['key']:
                    marker_doc = doc
                elif 'immunochip' == doc_type['key']:
                    ic_docs.append(doc)
                elif 'rs_merge' == doc_type['key']:
                    history_docs.append(doc)

        criteria = {}
        if marker_doc is not None:
            if ElasticSettings.idx('CRITERIA') is not None:
                criteria = views.get_criteria([marker_doc], 'marker', 'id', 'MARKER')
            marker_doc.marker_build = _get_marker_build(ElasticSettings.idx('MARKER'))

        context = {
            'marker': marker_doc,
            'old_dbsnp_docs': _get_old_dbsnps(marker),
            'ic': ic_docs,
            'history': history_docs,
            'criteria': criteria
        }
        return render(request, 'marker/marker.html', context,
                      content_type='text/html')
    elif res.hits_total == 0:
        messages.error(request, 'Marker '+marker+' not found.')
        raise Http404()
Exemple #15
0
 def get_diseases(self):
     ''' Overridden get diseases for feature. '''
     if super(RegionDocument, self).get_diseases():
         idx = ElasticSettings.idx('REGION_CRITERIA')
         diseases = [getattr(d, "code") for d in Criteria.get_disease_tags(getattr(self, "region_id"), idx=idx)]
         return diseases
     return []
    def test_pubs_disease_tags(self):
        ''' Check the number of disease publications against the number of tags.disease and
        report differences`. '''
        count = True
        msg = ''
        for disease in DiseasePublicationTest.DISEASES:
            pmids = self._get_pmids(disease)
            disease_code = disease.lower()
            elastic = Search(search_query=ElasticQuery(BoolQuery(
                         b_filter=Filter(Query.term('tags.disease', disease_code))), sources=['pmid']),
                         idx=ElasticSettings.idx('PUBLICATION'), size=len(pmids)*2)
            res = elastic.get_count()
            msg += disease_code+'\tINDEX: '+str(res['count'])+'\tNCBI: '+str(len(pmids))
            if res['count'] != len(pmids):
                count = False
                docs = elastic.search().docs
                pmids_in_idx = [getattr(doc, 'pmid') for doc in docs]
                pmids_diff1 = [pmid for pmid in pmids_in_idx if pmid not in pmids]
                pmids_diff2 = [pmid for pmid in pmids if pmid not in pmids_in_idx]
                if len(pmids_diff1) > 0:
                    msg += '\textra PMIDs: '+str(pmids_diff1)
                if len(pmids_diff2) > 0:
                    msg += '\tmissing PMIDs: '+str(pmids_diff2)
            msg += '\n'

        print(msg)
        self.assertTrue(count, 'Count for disease tags')
Exemple #17
0
 def get_diseases(self):
     ''' Overridden get diseases for feature. '''
     if super(StudyDocument, self).get_diseases():
         diseases = [getattr(d, "code") for d in
                     Criteria.get_disease_tags(self.get_name(), idx=ElasticSettings.idx('STUDY_CRITERIA'))]
         return diseases
     return []
 def get_gene_docs_by_ensembl_id(cls, ens_ids, sources=None):
     ''' Get the gene symbols for the corresponding array of ensembl IDs.
     A dictionary is returned with the key being the ensembl ID and the
     value the gene document. '''
     query = ElasticQuery(Query.ids(ens_ids), sources=sources)
     elastic = Search(query, idx=ElasticSettings.idx('GENE', idx_type='GENE'), size=len(ens_ids))
     return {doc.doc_id(): doc for doc in elastic.search().docs}
Exemple #19
0
 def get_hits_by_study_id(cls, study_id, sources=[]):
     ''' Get visible/authenticated hits. '''
     hits_query = ElasticQuery(BoolQuery(must_arr=Query.term('dil_study_id', study_id),
                                         b_filter=Filter(Query.missing_terms("field", "group_name"))),
                               sources=sources)
     docs = Search(hits_query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'), size=1000).search().docs
     ens_ids = [gene for doc in docs if getattr(doc, 'genes') for gene in getattr(doc, 'genes')]
     gene_docs = utils.get_gene_docs_by_ensembl_id(ens_ids, ['symbol'])
     for doc in docs:
         if getattr(doc, 'genes'):
             genes = {}
             for ens_id in getattr(doc, 'genes'):
                 try:
                     genes[ens_id] = getattr(gene_docs[ens_id], 'symbol')
                 except KeyError:
                     genes = {ens_id: ens_id}
             setattr(doc, 'genes', genes)
         build_info = getattr(doc, 'build_info')
         for bi in build_info:
             if bi['build'] == settings.DEFAULT_BUILD:
                 setattr(doc, "loc", "chr" + bi['seqid'] + ":" +
                         str(locale.format("%d", bi['start'], grouping=True)) + "-" +
                         str(locale.format("%d", bi['end'], grouping=True)))
                 setattr(doc, "encoded_loc", "chr" + bi['seqid'] + "%3A" +
                         str(bi['start']) + ".." + str(bi['end']))
     return docs
 def test_missing_terms_filtered_query(self):
     ''' Test filtered query with a missing terms filter. '''
     terms_filter = TermsFilter.get_missing_terms_filter("field", "group_name")
     query = ElasticQuery.filtered(Query.match_all(), terms_filter)
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     docs = elastic.search().docs
     self.assertTrue(len(docs) == 3, "Elastic string query retrieved all public docs")
 def test_string_query(self):
     ''' Test building and running a string query. '''
     query = ElasticQuery.query_string("rs2476601", fields=["id"])
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     docs = elastic.search()
     self.assertTrue(len(docs.docs) == 1, "Elastic string query retrieved marker (rs2476601)")
     self.assertRaises(QueryError, ElasticQuery.query_string, "rs2476601", fieldssss=["id"])
    def gene_in_region(cls, hit, section=None, config=None, result_container={}):

        try:
            padded_region_doc = utils.Region.pad_region_doc(Document(hit))
        except:
            logger.warn('Region padding error ')
            return result_container

        # 'build_info': {'end': 22411939, 'seqid': '1', 'build': 38, 'start': 22326008}, 'region_id': '1p36.12_008'}
        region_id = getattr(padded_region_doc, "region_id")
        region_name = getattr(padded_region_doc, "region_name")
        build_info = getattr(padded_region_doc, "build_info")
        diseases = getattr(padded_region_doc, "tags")['disease']
        seqid = build_info['seqid']
        start = build_info['start']
        end = build_info['end']

        gene_index = ElasticSettings.idx('GENE', idx_type='GENE')
        elastic = Search.range_overlap_query(seqid=seqid, start_range=start, end_range=end,
                                             idx=gene_index, field_list=['start', 'stop', '_id'],
                                             seqid_param="chromosome",
                                             end_param="stop", size=10000)
        result_docs = elastic.search().docs

        genes = set()
        for doc in result_docs:
            genes.add(doc.doc_id())

        result_container_populated = cls.populate_container(region_id,
                                                            region_name,
                                                            fnotes=None, features=genes,
                                                            diseases=diseases,
                                                            result_container=result_container)
        return result_container_populated
Exemple #23
0
def _categories(idx):
    idxs = idx.split(",")
    idx_types = {}
    for this_idx in idxs:
        if this_idx+'/marker' == ElasticSettings.idx('MARKER', 'MARKER'):
            stype = {'type': 'Marker',
                     'categories': ['synonymous', 'non-synonymous'],
                     'search': ['in LD of selected']}
        elif this_idx == ElasticSettings.idx('REGION'):
            stype = {'type': 'Region'}
        elif this_idx == ElasticSettings.idx('GENE'):
            stype = {'type': 'Gene', 'categories': ['protein coding', 'non-coding', 'pseudogene']}
        else:
            stype = {'type': 'Other'}
        idx_types[this_idx] = stype
    return idx_types
Exemple #24
0
    def filter_queryset(self, request, queryset, view):
        ''' Override this method to request just the documents required from Rserve. '''
        try:
            filterable = getattr(view, 'filter_fields', [])
            filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])

            mid1 = filters.get('marker', 'rs2476601')
            dataset = filters.get('dataset', 'EUR').replace('-', '')
            query = ElasticQuery(BoolQuery(must_arr=[Query.term("id", mid1)]), sources=['seqid', 'start'])
            elastic = Search(search_query=query, idx=ElasticSettings.idx('MARKER', 'MARKER'), size=1)
            doc = elastic.search().docs[0]
            seqid = getattr(doc, 'seqid')

            rserve = getattr(settings, 'RSERVE')
            conn = pyRserve.connect(host=rserve.get('HOST'), port=rserve.get('PORT'))
            pop_str = conn.r.get_pop(dataset, seqid, mid1)

            pops = json.loads(str(pop_str))
            populations = []
            for pop in pops:
                pops[pop]['population'] = pop
                populations.append(pops[pop])
            conn.close()
            return [ElasticObject(initial={'populations': populations, 'marker': mid1})]
        except (TypeError, ValueError, IndexError, ConnectionError):
            return [ElasticObject(initial={'populations': None, 'marker': mid1})]
Exemple #25
0
def _get_pub_docs_by_pmid(pmids, sources=None):
    """ Get the gene symbols for the corresponding array of ensembl IDs.
    A dictionary is returned with the key being the ensembl ID and the
    value the gene document. """
    query = ElasticQuery(Query.ids(pmids), sources=sources)
    elastic = Search(query, idx=ElasticSettings.idx("PUBLICATION"), size=len(pmids))
    return {doc.doc_id(): doc for doc in elastic.search().docs}
 def test_significant_terms(self):
     ''' Significant Terms Aggregation '''
     agg = Agg("test_significant_terms", "significant_terms", {"field": "start"})
     aggs = Aggs(agg)
     search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))
     r_aggs = search.search().aggs
     self.assertTrue('test_significant_terms' in r_aggs, "returned aggregations")
 def _get_random_marker(self):
     ''' Get a random marker from the dbSNP elastic index. '''
     (idx, idx_type) = ElasticSettings.idx('MARKER', 'MARKER').split('/')
     seqid = random.randint(1, 10)
     qbool = BoolQuery(must_arr=[Query.term("seqid", seqid), RangeQuery("tags.weight", gte=80)])
     doc = ElasticUtils.get_rdm_docs(idx, idx_type, qbool=qbool, sources=['id', 'start'], size=1)[0]
     return getattr(doc, 'id')
    def test_get_criteria_details(self):
        config = IniParser().read_ini(MY_INI_FILE)
        idx = ElasticSettings.idx('MARKER_CRITERIA')
        available_criterias = MarkerCriteria.get_available_criterias(config=config)['marker']
        idx_type = ','.join(available_criterias)

        doc_by_idx_type = ElasticUtils.get_rdm_docs(idx, idx_type, size=1)
        self.assertTrue(len(doc_by_idx_type) == 1)
        feature_id = getattr(doc_by_idx_type[0], 'qid')

        criteria_details = MarkerCriteria.get_criteria_details(feature_id, config=config)

        hits = criteria_details['hits']
        first_hit = hits[0]
        _type = first_hit['_type']
        _index = first_hit['_index']
        _id = first_hit['_id']
        _source = first_hit['_source']

        disease_tag = _source['disease_tags'][0]
        self.assertTrue(feature_id, _id)
        self.assertIn(_type, idx_type)
        self.assertTrue(idx, _index)
        self.assertIn(disease_tag, list(_source.keys()))

        fdetails = _source[disease_tag][0]
        self.assertIn('fid', fdetails.keys())
        self.assertIn('fname', fdetails.keys())
Exemple #29
0
def show_es_gene_section(gene_symbol=None,
                         seqid=None,
                         start_pos=None,
                         end_pos=None):
    ''' Template inclusion tag to render a gene section given a
    chado gene feature. '''
    seqid = str(seqid).replace('chr', '')
    if gene_symbol is not None:
        ''' gene symbol query'''
        query = ElasticQuery.query_match("symbol", gene_symbol)
    elif end_pos is None:
        ''' start and end are same, range query for snp'''
        query_bool = BoolQuery(must_arr=[
            Query.match("chromosome", seqid),
            RangeQuery("start", lte=start_pos),
            RangeQuery("stop", gte=start_pos)
        ])
        query = ElasticQuery.bool(query_bool)
    else:
        ''' start and end are same, range query for snp'''
        query_bool = BoolQuery(must_arr=[
            Query.match("chromosome", seqid),
            RangeQuery("start", gte=start_pos),
            RangeQuery("stop", lte=end_pos)
        ])
        query = ElasticQuery.bool(query_bool)

    elastic = Search(query, idx=ElasticSettings.idx(name='GENE'))
    return {'es_genes': elastic.search().docs}
    def test_gene_interactions(self):
        '''Fetch random genes from elastic and compare the same with the results fetched via ensembl restful query'''
        # elastic doc example:
        # "_source":{"interaction_source": "intact", "interactors": [
        # {"interactor": "ENSG00000206053", "pubmed": "16169070"},
        # {"interactor": "ENSG00000101474", "pubmed": "16169070"},
        # {"interactor": "ENSG00000065361", "pubmed": "16169070"},
        # {"interactor": "ENSG00000085465", "pubmed": "16169070"}]}

        idx_key = 'GENE'
        idx_type_key = 'INTERACTIONS'

        idx = ElasticSettings.idx(idx_key, idx_type_key)
        (idx, idx_type) = idx.split('/')

        # Test doc count
        doc_count = DataIntegrityUtils.get_docs_count(idx, idx_type)
        self.assertGreater(doc_count, 23000, 'Gene doc count greater than 60000')

        # Get interaction doc - passing the interaction source and id . Also test with random id
        (child_doc_bioplex, parent_doc_bioplex) = self.get_interaction_doc("bioplex", "ENSG00000241186")
        self.check_bioplex_data(child_doc_bioplex, parent_doc_bioplex)

        (child_doc_bioplex, parent_doc_bioplex) = self.get_interaction_doc("bioplex")
        self.check_bioplex_data(child_doc_bioplex, parent_doc_bioplex)

        (child_doc_intact, parent_doc_intact) = self.get_interaction_doc("intact", parent_id="ENSG00000090776")
        self.check_intact_data(child_doc_intact, parent_doc_intact)

        (child_doc_intact, parent_doc_intact) = self.get_interaction_doc("intact")
        self.check_intact_data(child_doc_intact, parent_doc_intact)
    def test_filter(self):
        ''' Filter Aggregation '''
        agg = [Agg('test_filter', 'filter', RangeQuery('start', gt='25000')),
               Agg('avg_start', 'avg', {"field": 'start'}),
               Agg('min_start', 'min', {"field": 'start'}),
               Agg('sum_start', 'sum', {"field": 'start'}),
               Agg('stats_start', 'stats', {"field": 'start'}),
               Agg('count_start', 'value_count', {"field": 'start'}),
               Agg('ext_stats_start', 'extended_stats', {"field": 'start'})]
        aggs = Aggs(agg)
        search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))

        r_aggs = search.search().aggs
        self.assertTrue('avg_start' in r_aggs, "returned avg aggregation")
        self.assertTrue('min_start' in r_aggs, "returned min aggregation")

        stats_keys = ["min", "max", "sum", "count", "avg"]
        self.assertTrue(all(hasattr(r_aggs['stats_start'], k)
                            for k in stats_keys),
                        "returned min aggregation")

        stats_keys.extend(["sum_of_squares", "variance", "std_deviation", "std_deviation_bounds"])
        self.assertTrue(all(hasattr(r_aggs['ext_stats_start'], k)
                            for k in stats_keys),
                        "returned min aggregation")
    def get_interaction_doc(self, interaction_source='intact', parent_id=None):

        idx_key = 'GENE'
        idx_type_key = 'INTERACTIONS'
        parent_idx_key = 'GENE'

        idx = ElasticSettings.idx(idx_key, idx_type_key)
        (idx, idx_type) = idx.split('/')

        if parent_id:
            qbool_intact = BoolQuery().must([Query.term("interaction_source", interaction_source),
                                            Query.term("_parent", parent_id)])
        else:
            qbool_intact = BoolQuery().should([Query.term("interaction_source", interaction_source)])

        # Get random doc or specific if id is passed in query
        docs_by_geneid = DataIntegrityUtils.get_rdm_docs(idx, idx_type, qbool=qbool_intact, sources=[], size=1)
        doc = docs_by_geneid[0]

        # Get parent doc
        parent_id = doc.parent()
        parent_docs = DataIntegrityUtils.fetch_from_elastic(idx_key, parent_idx_key, [parent_id])

        if parent_docs:
            self.assertTrue(len(parent_docs) >= 1, "Found 1 parent")
            parent_doc = parent_docs[0]
            return doc, parent_doc
        else:
            return self.get_interaction_doc("intact", parent_id)
Exemple #33
0
def suggester(request):
    ''' Provide auto suggestions. Ajax request returning a JSON response. '''
    query_dict = request.GET
    idx_dict = ElasticSettings.search_props(query_dict.get("idx"), request.user)
    suggester = ','.join(ElasticSettings.idx(k) for k in idx_dict['suggester_keys'])
    resp = Suggest.suggest(query_dict.get("term"), suggester, name='suggest', size=8)['suggest']
    return JsonResponse({"data": [opts['text'] for opts in resp[0]['options']]})
def show_es_gene_section(gene_symbol=None,
                         seqid=None,
                         start_pos=None,
                         end_pos=None):
    ''' Template inclusion tag to render a gene section given a
    chado gene feature. '''
    if seqid is not None and isinstance(seqid,
                                        str) and seqid.startswith("chr"):
        seqid = seqid
    else:
        seqid = 'chr' + str(seqid)
    if gene_symbol is not None:
        ''' gene symbol query'''
        query = ElasticQuery.query_match("gene_symbol", gene_symbol)
    elif end_pos is None:
        ''' start and end are same, range query for snp'''
        query_bool = BoolQuery(must_arr=[
            Query.match("seqid", seqid),
            RangeQuery("featureloc.start", lte=start_pos),
            RangeQuery("featureloc.end", gte=start_pos)
        ])
        query = ElasticQuery.bool(query_bool)
    else:
        ''' start and end are same, range query for snp'''
        query_bool = BoolQuery(must_arr=[
            Query.match("seqid", seqid),
            RangeQuery("featureloc.start", gte=start_pos),
            RangeQuery("featureloc.end", lte=end_pos)
        ])
        query = ElasticQuery.bool(query_bool)

    elastic = Search(query, idx=ElasticSettings.idx(name='GENE'))
    return {'es_genes': elastic.search().docs}
    def test_gene_criteria_types(self):
        """Test if the indexes have records"""
        idx_key = "GENE_CRITERIA"
        feature_type = "gene"
        idx = ElasticSettings.idx(idx_key)

        idx_types = CriteriaDataIntegrityUtils.get_criteria_index_types(idx_key)
        gene_criterias = Criteria.get_available_criterias(feature_type)

        CriteriaDataIntegrityTestUtils().test_criteria_types(idx, idx_types, gene_criterias["gene"])
        CriteriaDataIntegrityTestUtils().test_criteria_mappings(idx, idx_types)

        # get random doc for each type ['gene_in_region', 'cand_gene_in_region', 'cand_gene_in_study', 'is_gene_in_mhc']
        idx_type = "gene_in_region"
        doc_by_idx_type = ElasticUtils.get_rdm_docs(idx, idx_type, size=1)
        self.assertTrue(len(doc_by_idx_type) == 1, "got back one document")
        gene_in_region_doc = doc_by_idx_type[0]

        #         {'score': 10, 'CRO': [{'fname': '4p11', 'fid': '4p11_005'}],
        #          '_meta': {'_type': 'gene_in_region', '_score': 0.9997835,
        #                    '_index': 'pydgin_imb_criteria_gene', '_id': 'ENSG00000250753'},
        #          'disease_tags': ['CRO'], 'qid': 'ENSG00000250753'}

        qid = getattr(gene_in_region_doc, "qid")
        print(qid)
        disease_tags = getattr(gene_in_region_doc, "disease_tags")
        #         ENSG00000248482
        #         ['IBD', 'UC']
        #         [{'fid': '5q31.1_013', 'fname': '5q31.1'}]
        #         [{'fid': '5q31.1_013', 'fname': '5q31.1'}]
        fnotes = getattr(gene_in_region_doc, disease_tags[0])
        region_id = fnotes[0]["fid"]
        print(region_id)
Exemple #36
0
def association_stats(request, sources=None):
    ''' Get association statistics for a given marker ID. '''
    seqid = request.GET.get('chr').replace('chr', '')
    idx_type = request.GET.get('idx_type').upper()
    start = request.GET.get('start')
    end = request.GET.get('end')
    data = []

    def get_stats(resp_json):
        hits = resp_json['hits']['hits']
        for hit in hits:
            d = Document(hit)
            data.append({
                "CHROM": getattr(d, 'seqid'),
                "POS": getattr(d, 'position'),
                "PVALUE": getattr(d, 'p_value'),
                "DBSNP_ID": getattr(d, 'marker')
            })

    query = ElasticQuery(Query.query_string(seqid, fields=["seqid"]), sources=sources)
    if start is not None and end is not None:
        query = ElasticQuery(BoolQuery(must_arr=[Query.query_string(seqid, fields=["seqid"]),
                                                 RangeQuery("position", gte=start, lte=end)]), 
                             sources=sources)
    ScanAndScroll.scan_and_scroll(ElasticSettings.idx('IC_STATS', idx_type), call_fun=get_stats, query=query)

    json = {"variants": data}
    return JsonResponse(json)
 def test_missing(self):
     ''' Missing Aggregation '''
     agg = Agg("test_missing", "missing", {"field": "seqid"})
     aggs = Aggs(agg)
     search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))
     r_aggs = search.search().aggs
     self.assertTrue(getattr(r_aggs['test_missing'], 'doc_count') == 0,
                     "no missing seqid fields")
 def test_top_hits(self):
     ''' Top Hits Aggregation '''
     agg = [Agg('test_filter', 'filter', RangeQuery('start', gt='2000')),
            Agg('test_top_hits', 'top_hits', {"size": 1})]
     aggs = Aggs(agg)
     search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))
     hits = search.search().aggs['test_top_hits'].get_hits()
     self.assertTrue(len(hits) == 1, "returned the top hit")
Exemple #39
0
 def get_publications(cls, pmids, sources=[]):
     ''' Get publications from the list of PMIDs. '''
     if pmids is None or not pmids:
         return None
     from elastic.search import Search, ElasticQuery
     pubs = Search(ElasticQuery(Query.ids(pmids), sources=sources),
                   idx=ElasticSettings.idx('PUBLICATION', 'PUBLICATION'), size=2).search().docs
     return pubs
Exemple #40
0
    def get_marker(cls, request, marker, context):
        if marker is None:
            messages.error(request, 'No marker name given.')
            raise Http404()

        fields = ['id', 'rscurrent'] if marker.startswith("rs") else ['name']
        sub_agg = Agg('top_hits', 'top_hits', {"size": 15})
        aggs = Aggs(Agg("types", "terms", {"field": "_type"}, sub_agg=sub_agg))
        query = ElasticQuery(Query.query_string(marker, fields=fields))
        elastic = Search(search_query=query, idx=ElasticSettings.idx('MARKER'), aggs=aggs, size=0)
        res = elastic.search()
        title = ''
        if res.hits_total >= 1:
            types = getattr(res.aggs['types'], 'buckets')
            marker_doc = None
            ic_docs = []
            history_docs = []
            for doc_type in types:
                hits = doc_type['top_hits']['hits']['hits']
                for hit in hits:
                    doc = PydginDocument.factory(hit)
                    if doc.get_name() is not None:
                        title = doc.get_name()

                    if 'marker' == doc_type['key']:
                        marker_doc = doc
                    elif 'immunochip' == doc_type['key']:
                        ic_docs.append(doc)
                    elif 'rs_merge' == doc_type['key']:
                        history_docs.append(doc)

            if marker_doc is not None:
                marker_doc.marker_build = _get_marker_build(ElasticSettings.idx('MARKER'))

            criteria_disease_tags = MarkerView.criteria_disease_tags(request, [marker])
            context['criteria'] = criteria_disease_tags
            context['features'] = [marker_doc]
            context['old_dbsnp_docs'] = _get_old_dbsnps(marker)
            context['ic'] = ic_docs
            context['history'] = history_docs
            context['title'] = title
            context['jbrowse_tracks'] = "PydginRegions%2Cdbsnp146%2CEnsemblGenes"
            return context
        elif res.hits_total == 0:
            messages.error(request, 'Marker '+marker+' not found.')
            raise Http404()
Exemple #41
0
def _build_score_functions(idx_dict):
    ''' Build an array of ScoreFunction instances for boosting query results. '''
    # create function score query to return documents with greater weights.
    score_fns = [ScoreFunction.create_score_function('field_value_factor', field='tags.weight', missing=1.0)]

    # create a function score that increases the score of markers.
    if ElasticSettings.idx('MARKER') is not None and ElasticSettings.idx('MARKER') in idx_dict['idx']:
        type_filter = Filter(Query({"type": {"value": ElasticSettings.get_idx_types('MARKER')['MARKER']['type']}}))
        score_fns.append(ScoreFunction.create_score_function('weight', 2, function_filter=type_filter.filter))
        logger.debug("Add marker type score function.")

    # create a function score that increases the score of publications tagged with disease.
    if ElasticSettings.idx('PUBLICATION') is not None and ElasticSettings.idx('PUBLICATION') in idx_dict['idx']:
        score_fns.append(ScoreFunction.create_score_function('weight', 2,
                                                             function_filter=ExistsFilter('tags.disease').filter))
        logger.debug("Add publication disease tag score function.")
    return score_fns
    def test_hit_attributes(self):
        '''Fetch random genes from elastic and compare the same with the results fetched via ensembl restful query'''

        for idx_type_key in RegionDataTest.IDX_TYPE_KEYS:
            idx = ElasticSettings.idx(RegionDataTest.IDX_KEY, idx_type_key)
            (idx, idx_type) = idx.split('/')

            docs = ElasticUtils.get_rdm_docs(idx, idx_type, qbool=Query.match_all(), sources=[], size=1)
        def check_hits(resp_json):
            rsids = {}
            docs = [Document(hit) for hit in resp_json['hits']['hits']]
            for doc in docs:
                rsid = getattr(doc, "id")
                if rsid is not None:
                    rsids[rsid] = doc
            rsids_keys = list(rsids.keys())
            terms_filter = TermsFilter.get_terms_filter("id", rsids_keys)
            query = ElasticQuery.filtered(Query.match_all(), terms_filter)
            elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'MARKER'), size=len(rsids_keys))
            docs_by_rsid = elastic.search().docs
            for doc in docs_by_rsid:
                info = getattr(doc, "info")
                if 'VC=SNV' not in info:
                    continue
                rsid = getattr(doc, "id")
                ic_doc = rsids[rsid]
                pos1 = getattr(doc, "start")
                pos2 = self._get_highest_build(ic_doc)['position']
                if abs(int(pos1) - int(pos2)) > 1:
                    is_par = getattr(ic_doc, 'is_par')
                    allele_a = getattr(ic_doc, 'allele_a')
                    if is_par is None and not (allele_a == 'D' or allele_a == 'I'):
                        msg = ("CHECK IC/DBSNP POSITIONS:: "+getattr(ic_doc, 'name') +
                               ' '+str(pos2)+" "+rsid+' '+str(pos1))
#                                ' ('+ic_doc.doc_id()+' '+json.dumps(getattr(ic_doc, 'build_info'))+')'

                        query = ElasticQuery.filtered(Query.term("seqid", getattr(doc, 'seqid')),
                                                      Filter(Query.term("start", pos2)))
                        elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'MARKER'))
                        docs_by_pos = elastic.search().docs
                        if len(docs_by_pos) > 0:
                            for d in docs_by_pos:
                                msg += " ("+getattr(d, "id")+":"+str(getattr(d, "start"))+")"

                        query = ElasticQuery.filtered(Query.match_all(), Filter(Query.term("rslow", rsid)))
                        elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'HISTORY'))
                        docs_by_pos = elastic.search().docs
                        if len(docs_by_pos) > 0:
                            for d in docs_by_pos:
                                msg += " (rshigh:"+str(getattr(d, "rshigh")) + \
                                       " build_id:"+str(getattr(d, "build_id"))+")"

                        logger.error(msg)
 def test_bool_filtered_query(self):
     ''' Test building and running a filtered boolean query. '''
     query_bool = BoolQuery(must_not_arr=[Query.term("seqid", 2)],
                            should_arr=[RangeQuery("start", gte=10050)])
     query_bool.must([Query.term("id", "rs768019142")]) \
               .should(RangeQuery("start", gte=10054))
     query = ElasticQuery.filtered_bool(Query.match_all(), query_bool, sources=["id", "seqid"])
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     self.assertTrue(elastic.search().hits_total == 1, "Elastic filtered query retrieved marker (rs768019142)")
Exemple #45
0
 def range_overlap_query(cls, seqid, start_range, end_range,
                         search_from=0, size=20, idx=ElasticSettings.idx('DEFAULT'),
                         field_list=None, seqid_param="seqid", start_param="start", end_param="end"):
     ''' Constructs a range overlap query '''
     from elastic import utils
     query = utils.ElasticUtils.range_overlap_query(seqid, start_range, end_range, field_list=field_list,
                                                    seqid_param=seqid_param, start_param=start_param,
                                                    end_param=end_param)
     return cls(search_query=query, search_from=search_from, size=size, idx=idx)
 def test_and_filtered_query(self):
     ''' Test building and running a filtered query. '''
     query_bool = BoolQuery(must_arr=[RangeQuery("start", gte=1)])
     and_filter = AndFilter(query_bool)
     and_filter.extend(RangeQuery("start", gte=1)) \
               .extend(Query.term("seqid", 1))
     query = ElasticQuery.filtered(Query.term("seqid", 1), and_filter)
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     self.assertTrue(elastic.search().hits_total >= 1, "Elastic filtered query retrieved marker(s)")
Exemple #47
0
 def test_url_rotate(self):
     ''' Test the url rotates from http://xxx:9200 to correct url. '''
     query = ElasticQuery.filtered(Query.term("seqid", 1),
                                   Filter(Query.term("id", "rs768019142")))
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     self.assertTrue(elastic.search().hits_total == 1,
                     "Elastic filtered query retrieved marker")
     Search.index_exists('test', 'test2')
     ElasticUrl.URL_INDEX = 0  # reset
    def test_fetch_overlapping_features(self):
        region_index = ElasticSettings.idx('REGION', idx_type='STUDY_HITS')
        (region_idx, region_idx_type) = region_index.split('/')

        seqid = '1'
        start = 206767602
        stop = 206772494
        result_docs = Criteria.fetch_overlapping_features('38', seqid, start, stop, region_idx, region_idx_type)
        self.assertTrue(len(result_docs) > 0, 'Got some overlapping features')
Exemple #49
0
def region_page(request, region):
    ''' Region elastic'''
    query = ElasticQuery.query_match("attr.region_id", region)
    elastic = Search(query, idx=ElasticSettings.idx(name='REGION'))
    context = elastic.get_result()
    context['title'] = "Region"
    print(context)
    return render(request, 'region/region.html', context,
                  content_type='text/html')
 def test_terms_avg_order(self):
     ''' Test average and order. '''
     agg_name = "test"
     sub_agg = Agg('avg_start', 'avg', {"field": "start"})
     agg = Agg(agg_name, "terms", {"field": "seqid", "size": 0, "order": {"avg_start": "desc"}}, sub_agg=sub_agg)
     search = Search(aggs=Aggs(agg), idx=ElasticSettings.idx('DEFAULT'))
     r_aggs = search.search().aggs
     self.assertTrue(agg_name in r_aggs, "returned test aggregations")
     self.assertGreater(r_aggs['test'].get_buckets()[0]['doc_count'], 1)
Exemple #51
0
    def __init__(self, search_query=None, aggs=None, search_from=0, size=20,
                 search_type=None, idx=ElasticSettings.idx('DEFAULT'), idx_type='',
                 qsort=None, elastic_url=None):
        ''' Set up parameters to use in the search. L{ElasticQuery} is used to
        define a search query.
        @type  search_query: L{ElasticQuery}
        @keyword search_query: The elastic query to search (default: None).
        @type  aggs: L{Aggs}
        @keyword aggs: Aggregations used in the search.
        @type  search_from: integer
        @keyword search_from: Offset used in paginations (default: 0).
        @type  size: integer
        @keyword size: maximum number of hits to return (default: 20).
        @type search_type: bool
        @keyword search_type: Set search type = count for aggregations.
        @type  idx: string
        @keyword idx: index to search (default: default index defined in settings).
        @type  idx_type: string
        @keyword idx_type: index type (default: '').
        @type  qsort: Sort
        @keyword qsort: defines sorting for the query.
        @type  url: string
        @keyword url: Elastic URL (default: default cluster URL).
        '''
        if search_query is not None:
            if not isinstance(search_query, ElasticQuery):
                raise QueryError("not an ElasticQuery")
            self.query = search_query.query

        if aggs is not None:
            if hasattr(self, 'query'):
                self.query.update(aggs.aggs)
            else:
                self.query = aggs.aggs

        if qsort is not None:
            if not isinstance(qsort, Sort):
                raise QueryError("not a Sort")
            if hasattr(self, 'query'):
                self.query.update(qsort.qsort)
            else:
                logger.error("no query to sort")

        if elastic_url is None:
            elastic_url = ElasticSettings.url()

        self.size = size
        self.search_from = search_from
        self.search_type = search_type
        self.idx = idx
        self.idx_type = idx_type
        self.elastic_url = elastic_url
        if self.search_type is None:
            self.url = (self.idx + '/' + self.idx_type +
                        '/_search?size=' + str(self.size) + '&from='+str(self.search_from))
        else:
            self.url = (self.idx + '/' + self.idx_type + '/_search?search_type='+search_type)
 def get_random_feature_id(self):
     config = IniParser().read_ini(MY_INI_FILE)
     idx = ElasticSettings.idx('GENE_CRITERIA')
     available_criterias = GeneCriteria.get_available_criterias(config=config)['gene']
     idx_type = ','.join(available_criterias)
     doc_by_idx_type = ElasticUtils.get_rdm_docs(idx, idx_type, size=1)
     self.assertTrue(len(doc_by_idx_type) > 0)
     feature_id = getattr(doc_by_idx_type[0], 'qid')
     return feature_id
 def test_terms_query(self):
     ''' Test building and running a match query. '''
     highlight = Highlight(["id"])
     query = ElasticQuery(Query.terms("id", ["rs2476601", "rs768019142"]), highlight=highlight)
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     docs = elastic.search().docs
     self.assertTrue(len(docs) == 2,
                     "Elastic string query retrieved markers (rs2476601, rs768019142)")
     self.assertTrue(getattr(docs[0], 'seqid'), "Hit attribute found")
     self.assertTrue(docs[0].highlight() is not None, "highlighting found")
 def test_bool_filtered_query2(self):
     ''' Test building and running a filtered boolean query. '''
     query_bool = BoolQuery()
     query_bool.should(RangeQuery("start", lte=20000)) \
               .should(Query.term("seqid", 2)) \
               .must(Query.term("seqid", 1))
     query_string = Query.query_string("rs768019142", fields=["id", "seqid"])
     query = ElasticQuery.filtered_bool(query_string, query_bool, sources=["id", "seqid", "start"])
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     self.assertTrue(elastic.search().hits_total == 1, "Elastic filtered query retrieved marker (rs768019142)")
 def test_filters(self):
     ''' Filters Aggregation '''
     filters = {'filters': {'start_gt': RangeQuery('start', gt='1000'),
                            'start_lt': RangeQuery('start', lt='100000')}}
     agg = Agg('test_filters', 'filters', filters)
     aggs = Aggs(agg)
     search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))
     r_aggs = search.search().aggs
     self.assertTrue('start_lt' in r_aggs['test_filters'].get_buckets(),
                     "returned avg aggregation")
 def test_or_filtered_query(self):
     ''' Test building and running a filtered query. '''
     highlight = Highlight(["id", "seqid"])
     query_bool = BoolQuery(must_arr=[RangeQuery("start", lte=1),
                                      RangeQuery("end", gte=100000)])
     or_filter = OrFilter(RangeQuery("start", gte=1, lte=100000))
     or_filter.extend(query_bool) \
              .extend(Query.query_string("rs*", fields=["id", "seqid"]).query_wrap())
     query = ElasticQuery.filtered(Query.term("seqid", 1), or_filter, highlight=highlight)
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     self.assertTrue(elastic.search().hits_total >= 1, "Elastic filtered query retrieved marker(s)")
 def test_range(self):
     ''' Range Aggregation '''
     agg = Agg("test_range_agg", "range",
               {"field": "start",
                "ranges": [{"to": 10000},
                           {"from": 10000, "to": 15000}]})
     aggs = Aggs(agg)
     search = Search(aggs=aggs, idx=ElasticSettings.idx('DEFAULT'))
     r_aggs = search.search().aggs
     self.assertTrue(len(r_aggs['test_range_agg'].get_buckets()) == 2,
                     "returned two buckets in range aggregations")
Exemple #58
0
def region_page(request, region):
    ''' Region elastic'''
    query = ElasticQuery.query_match("attr.region_id", region)
    elastic = Search(query, idx=ElasticSettings.idx(name='REGION'))
    context = elastic.get_result()
    context['title'] = "Region"
    print(context)
    return render(request,
                  'region/region.html',
                  context,
                  content_type='text/html')
 def test_bool_query(self):
     ''' Test a bool query. '''
     query_bool = BoolQuery()
     highlight = Highlight(["id", "seqid"])
     query_bool.must(Query.term("id", "rs768019142")) \
               .must(RangeQuery("start", gt=1000)) \
               .must_not(Query.match("seqid", "2")) \
               .should(Query.match("seqid", "3")) \
               .should(Query.match("seqid", "1"))
     query = ElasticQuery.bool(query_bool, highlight=highlight)
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     self.assertTrue(len(elastic.search().docs) == 1, "Elastic string query retrieved marker (rs768019142)")
 def test_function_score_query(self):
     ''' Test a function score query with a query (using the start position as the score). '''
     score_function = ScoreFunction.create_score_function('field_value_factor', field='start', modifier='reciprocal')
     query_string = Query.query_string("rs*", fields=["id", "seqid"])
     query = ElasticQuery(FunctionScoreQuery(query_string, [score_function], boost_mode='replace'))
     docs = Search(query, idx=ElasticSettings.idx('DEFAULT')).search().docs
     self.assertGreater(len(docs), 1, str(len(docs)))
     last_start = 0
     for doc in docs:
         start = getattr(doc, 'start')
         self.assertLess(last_start, start)
         last_start = start