Ejemplo n.º 1
0
    def filter_queryset(self, request, queryset, view):
        ''' Override this method to request feature locations. '''
        try:
            filterable = getattr(view, 'filter_fields', [])
            filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])
            query_str = filters.get('feature', 'PTPN22')
            build = self._get_build(filters.get('build', settings.DEFAULT_BUILD))
            if query_str is None or query_str == '':
                return [ElasticObject(initial={'error': 'No feature name provided.'})]

            search_fields = ['id',
                             'symbol', 'dbxrefs.ensembl',
                             'region_name']
            sources = ['start', 'stop', 'seqid', 'chromosome',
                       'disease_loci']
            idxs = ElasticSettings.getattr('IDX')
            MARKER_IDX = ''

            if build == ElasticSettings.get_label('MARKER', label='build'):
                MARKER_IDX = 'MARKER'
            if MARKER_IDX == '':
                for idx in idxs:
                    if 'MARKER' in idx:
                        if build == ElasticSettings.get_label(idx, label='build'):
                            MARKER_IDX = idx

            (idx, idx_type) = ElasticSettings.idx_names(MARKER_IDX, 'MARKER')
            (idx_r, idx_type_r) = ElasticSettings.idx_names('REGION', 'REGION')
            (idx_g, idx_type_g) = ElasticSettings.idx_names('GENE', 'GENE')
            idx += ',' + idx_r + ',' + idx_g
            idx_type += ',' + idx_type_r + ',' + idx_type_g

            equery = BoolQuery(must_arr=Query.query_string(query_str, fields=search_fields))
            elastic = Search(search_query=ElasticQuery(equery, sources), size=10, idx=idx, idx_type=idx_type)
            docs = elastic.search().docs
            locs = []
            for doc in docs:
                if isinstance(doc, RegionDocument):
                    doc = Region.pad_region_doc(doc)

                loc = doc.get_position(build=build).split(':')
                pos = loc[1].replace(',', '').split('-')
                locs.append(ElasticObject(
                    {'feature': query_str,
                     'chr': loc[0],
                     'start': int(pos[0]),
                     'end': int(pos[1]) if len(pos) > 1 else int(pos[0]),
                     'locusString': query_str+" ("+str(loc[1])+")"}))
            return locs
        except (TypeError, ValueError, IndexError, ConnectionError):
            raise Http404
Ejemplo n.º 2
0
    def do_identifier_search(cls, identifiers, user=None):

        source_filter = [
                        'symbol', 'synonyms', "dbxrefs.*",                            # gene
                        'id', 'rscurrent', 'rshigh',                                  # marker
                        'study_id', 'study_name',                                     # study
                        'region_name', 'marker', "region_id"]                                      # regions

        highlight = Highlight(["symbol", "dbxrefs.*", "region", "region_name", "region_id",
                               "study_id", "study_name", "id", "rscurrent", "rshigh", "marker"])

        search_query = ElasticQuery(Query.query_string(" ".join(identifiers), fields=source_filter),
                                    highlight=highlight, sources=source_filter)

        search_idx_keys = ['REGION', 'GENE', 'STUDY', 'MARKER']
        search_idx_type_keys = ['REGION', 'GENE',  'STUDY', 'MARKER']

        idx_all = [ElasticSettings.idx_names(idx, idx_type=idx_type) for idx, idx_type in zip(search_idx_keys,
                                                                                              search_idx_type_keys)]
        idx_dict = dict(idx_all)

        search_idx = ','.join(idx_dict.keys())
        search_idx_types = ','.join(idx_dict.values())

        elastic = Search(search_query=search_query, idx=search_idx, idx_type=search_idx_types)

        gene_dict = {}
        region_dict = {}
        marker_dict = {}
        study_dict = {}

        docs = elastic.search().docs
        for doc in docs:
            existing_feature_list = []

            idx = getattr(doc, '_meta')['_index']
            idx_type = getattr(doc, '_meta')['_type']
            doc_id = doc.doc_id()

            highlight = doc.highlight()
            if highlight is not None:
                pattern = ".*?<em>(.*?)</em>.*"
                result = re.match(pattern, str(highlight))
                if result is not None:
                    highlight_hit = result.group(1)

                    if idx_type == "studies":
                        feature_id = getattr(doc, "study_id")

                        if highlight_hit not in study_dict:
                            study_dict[highlight_hit] = {}

                        if feature_id in study_dict[highlight_hit]:
                            existing_feature_list = study_dict[highlight_hit]

                        existing_feature_list.append(feature_id)
                        study_dict[highlight_hit] = existing_feature_list

                    if idx_type == "gene":
                        feature_id = doc_id

                        if highlight_hit not in gene_dict:
                            gene_dict[highlight_hit] = {}

                        if feature_id in gene_dict[highlight_hit]:
                            existing_feature_list = gene_dict[highlight_hit]

                        existing_feature_list.append(feature_id)
                        gene_dict[highlight_hit] = existing_feature_list

                    if idx_type == "marker":
                        feature_id = getattr(doc, "id")

                        if highlight_hit not in marker_dict:
                            marker_dict[highlight_hit] = {}

                        if feature_id in marker_dict[highlight_hit]:
                            existing_feature_list = marker_dict[highlight_hit]

                        existing_feature_list.append(feature_id)
                        marker_dict[highlight_hit] = existing_feature_list

                    if idx_type == "region":
                        feature_id = getattr(doc, "region_id")

                        if highlight_hit not in region_dict:
                            region_dict[highlight_hit] = {}

                        if feature_id in region_dict[highlight_hit]:
                            existing_feature_list = region_dict[highlight_hit]

                        existing_feature_list.append(feature_id)
                        region_dict[highlight_hit] = existing_feature_list

        all_result_dict = {}
        all_result_dict['gene'] = gene_dict
        all_result_dict['marker'] = marker_dict
        all_result_dict['region'] = region_dict
        all_result_dict['study'] = study_dict

        original_list = [_id.lower() for _id in identifiers]
        result_list = list(study_dict.keys()) + list(gene_dict.keys()) + list(marker_dict.keys()) + \
            list(region_dict.keys())
        result_list = [_id.lower() for _id in result_list]

        diff_list = set(original_list) - set(result_list)
        all_result_dict['missing'] = list(diff_list)
        return all_result_dict