Ejemplo n.º 1
0
    def get_cleaned_options(self, kwargs):
        ''' Separate URL keyword arguments into their functional category.
        Incoming kwargs are separated into one of 4 categories, depending on how the argument controls the
        pipeline:

        * ``control_kwargs`` - These are arguments that control the pipeline execution (**raw**, **rawquery**, etc)
        * ``es_kwargs`` - These are arguments that get passed directly to the Elasticsearch client during query
        * ``esqb_kwargs`` - These are arguments that go to the Elasticsearch query builder (**fields**, **size**, etc)
        * ``transform_kwargs`` - These are arguments that go to the Elasticsearch result transformer (**jsonld**, **dotfield**, etc)'''
        options = dotdict()

        # split kwargs into one (or more) of 4 categories:
        #   * control_kwargs:  kwargs that control aspects of the handler's pipeline (e.g. raw, rawquery)
        #   * es_kwargs: kwargs that go directly to the ES query (e.g. fields, size, ...)
        #   * esqb_kwargs: kwargs that go directly to the ESQueryBuilder instance
        #   * transform_kwargs: kwargs that go directly to the response transformer (e.g. jsonld, dotfield)
        for kwarg_category in [
                "control_kwargs", "es_kwargs", "esqb_kwargs",
                "transform_kwargs"
        ]:
            options.setdefault(kwarg_category, dotdict())
            for option, settings in getattr(self, kwarg_category, {}).items():
                if kwargs.get(option, None) or settings.get('default',
                                                            None) is not None:
                    options.get(kwarg_category).setdefault(
                        option, kwargs.get(option, settings['default']))
                # check here for userquery kwargs
                if re.match(self.web_settings.USERQUERY_KWARG_REGEX,
                            option) and kwarg_category == "esqb_kwargs":
                    options.esqb_kwargs.setdefault('userquery_kwargs',
                                                   dotdict())
                    options.esqb_kwargs.userquery_kwargs[
                        self.web_settings.USERQUERY_KWARG_TRANSFORM(
                            option)] = kwargs.get(option)
        return options
Ejemplo n.º 2
0
    async def execute_pipeline(self, *args, **kwargs):

        try:

            graph_query = GraphQuery.from_dict(self.args_json)
            es_query = self._to_es_query(graph_query)

            if graph_query.can_reverse():
                graph_query.reverse()
                es_query_rev = self._to_es_query(graph_query)
                es_query = es_query | es_query_rev

            # it's sent in one query so that parameters like size is still meaningful
            _query = AsyncSearch().query(es_query)
            _res = await self.pipeline.execute(_query, dotdict())
            res = self.pipeline.transform(_res, dotdict())

            # TODO additional transformation, like double reversal in result.

        except GraphObjectError as exc:
            raise BadRequest(reason=str(exc))

        except Exception as exc:
            raise HTTPError(str(exc))

        self.finish(res)
Ejemplo n.º 3
0
 def _get_cleaned_metadata_options(self, kwargs):
     """ Process options for /metadata query. """
     options = dotdict()
     # Delete all keys, can override this to add arguments to metadata endpoint
     for key in set(kwargs.keys()):
         del (kwargs[key])
     return options
Ejemplo n.º 4
0
    def _get_cleaned_common_options(self, kwargs):
        '''process options whatever the type of query (/query or annotation)'''
        options = dotdict()
        options.raw = kwargs.pop('raw', False)
        options.rawquery = kwargs.pop('rawquery', False)
        options.fetch_all = kwargs.pop('fetch_all', False)
        options.host = kwargs.pop('host', biothing_settings.ga_tracker_url)
        options.jsonld = kwargs.pop('jsonld', False)
        options.dotfield = kwargs.pop('dotfield', False) not in [False, 'false']

        #if no dotfield in "fields", set dotfield always be True, i.e., no need to parse dotfield
        if not options.dotfield:
            _found_dotfield = False
            if kwargs.get('fields'):
                for _f in kwargs['fields']:
                    if _f.find('.') != -1:
                        _found_dotfield = True
                        break
            if not _found_dotfield:
                options.dotfield = True

        options = self._get_options(options, kwargs)
        scopes = kwargs.pop('scopes', None)
        if scopes:
            options.scopes = self._cleaned_scopes(scopes)
        kwargs = parse_sort_option(kwargs)
        for key in set(kwargs) - set(self._allowed_options):
            logging.debug("removing param '%s' from query" % key)
            del kwargs[key]
        return options
Ejemplo n.º 5
0
    def query_dataset(self, chrom, start, ref, alt, assembly, dataset):
        # Initialzie output
        out = {'datasetId': dataset, 'exists': False}
        q_type = 'snp'

        # verify information and build query string
        if dataset in self.pos_dbs + self.assembly_dbs:
            if chrom and start and alt and assembly in self.assembly_keys:
                assembly = self.assembly_keys[assembly]  # get hg assembly notation
                if alt[:3] == 'DEL':  # syntax: "alternateBases": "DEL85689"
                    q_type = 'del'
                    ref = ''
                elif alt[:3] == 'DUP':  # "alternateBases": "DUP85689"
                    q_type = 'dup'
                    ref = ''
                elif not ref:
                    q_type = 'ins'
                    ref = ''

                q = self.format_query_string(q_type, chrom, start, ref, alt, assembly, dataset)
                # perform query and format result
                # for now always search against hg19 index...
                res = self.web_settings.es_client.search(
                    index=self.web_settings.ES_INDICES[assembly],
                    body={"query": {"query_string": {"query": q}}},
                    _source=[dataset]
                )
                res = self.result_transform.transform(res, dotdict(dotfield=True))
                if res and res.get('total') > 0:
                    out = self.format_output(res, out, q_type)

        return out
Ejemplo n.º 6
0
    def query_dataset(self, chrom, start, ref, alt, assembly, dataset):
        # Initialzie output
        out = {'datasetId': dataset, 'exists':False}
        q_type = 'snp'

        # verify information and build query string
        if dataset in self.pos_dbs+self.assembly_dbs:
            if chrom and start and alt and assembly in self.assembly_keys:
                assembly = self.assembly_keys[assembly]  #get hg assembly notation
                if alt[:3] == 'DEL': # syntax: "alternateBases": "DEL85689"
                    q_type = 'del'
                    ref = ''
                elif alt[:3] == 'DUP': # "alternateBases": "DUP85689"
                    q_type = 'dup'
                    ref = ''
                elif not ref:
                    q_type = 'ins'
                    ref = ''

                q = self.format_query_string(q_type, chrom, start, ref, alt, assembly, dataset)
                # perform query and format result
                # for now always search against hg19 index...
                res = self.web_settings.es_client.search(index='_'.join([self.web_settings.ES_INDEX_BASE, 'hg19']),
                    doc_type=self.web_settings.ES_DOC_TYPE, body={"query":{"query_string":{"query":q}}}, 
                    _source=[dataset])
                _transformer = ESResultTransformer(options=dotdict({'dotfield': True}), host=self.request.host)
                res = _transformer.clean_query_GET_response(res)
                
                if res and res.get('total') > 0:
                    out = self.format_output(res, out, q_type)
        return out
Ejemplo n.º 7
0
 def initialize(self, web_settings):
     super(BeaconInfoHandler, self).initialize(web_settings)
     _meta = self.web_settings.es_client.indices.get_mapping(index='_'.join([self.web_settings.ES_INDEX_BASE, 'hg19']),
                                                     doc_type=self.web_settings.ES_DOC_TYPE)
     self.m = _meta[list(_meta.keys())[0]]['mappings'][self.web_settings.ES_DOC_TYPE]['properties']
     _transformer = ESResultTransformer(options=dotdict(), host=self.request.host)
     self.meta = _transformer.clean_metadata_response(_meta)
Ejemplo n.º 8
0
 def _get_cleaned_metadata_options(self, kwargs):
     options = dotdict()
     this_assembly = kwargs.pop('assembly', myvariant_settings.default_assembly).lower()
     options.assembly = this_assembly if this_assembly in myvariant_settings.supported_assemblies else myvariant_settings.default_assembly
     options.chromosome = kwargs.pop('chromosome', False)
     for key in set(kwargs.keys()):
         del(kwargs[key])
     kwargs = {}
     return options
Ejemplo n.º 9
0
 def initialize(self, web_settings):
     super(BeaconInfoHandler, self).initialize(web_settings)
     _meta = self.web_settings.es_client.indices.get_mapping(
         index='_'.join([self.web_settings.ES_INDEX_BASE, 'hg19']),
         doc_type=self.web_settings.ES_DOC_TYPE)
     self.m = _meta[list(_meta.keys())[0]]['mappings'][
         self.web_settings.ES_DOC_TYPE]['properties']
     _transformer = ESResultTransformer(options=dotdict(),
                                        host=self.request.host)
     self.meta = _transformer.clean_metadata_response(_meta)
Ejemplo n.º 10
0
    def transform(self, response, options):
        """
        Transform the query response to a user-friendly structure.
        Mainly deconstruct the elasticsearch response structure and
        hand over to transform_doc to apply the options below.

        Options:
            dotfield: flatten a dictionary using dotfield notation
            _sorted: sort keys alaphabetically in ascending order
            always_list: ensure the fields specified are lists or wrapped in a list
            allow_null: ensure the fields specified are present in the result,
                        the fields may be provided as type None or [].
            biothing_type: result document type to apply customized transformation.
                        for example, add license field basing on document type's metadata.
            # only related to multiqueries
            template: base dict for every result, for example: {"success": true}
            templates: a different base for every result, replaces the setting above
            template_hit: a dict to update every positive hit result, default: {"found": true}
            template_miss: a dict to update every query with no hit, default: {"found": false}
        """
        if not isinstance(options, dotdict):
            options = dotdict(options)
        if isinstance(response, list):
            responses_ = []
            template = options.pop('template', {})
            templates = options.pop('templates', [template] * len(response))
            template_hit = options.pop('template_hit', dict(found=True))
            template_miss = options.pop('template_miss', dict(found=False))
            responses = [self.transform(res, options) for res in response]
            for res_, res in zip(templates, responses):
                if not res.get('hits'):
                    res_.update(template_miss)
                    responses_.append(res_)
                else:
                    for hit in res['hits']:
                        hit_ = dict(res_)
                        hit_.update(template_hit)
                        hit_.update(hit)
                        responses_.append(hit_)
            return list(filter(None, responses_))
        if isinstance(response, dict):
            response.update(response.pop('hits', {}))  # collapse one level
            response.pop('_shards')
            response.pop('timed_out')
            if 'hits' in response:
                for hit in response['hits']:
                    hit.update(hit.pop('_source', {}))  # collapse one level
                    self.transform_doc(hit, options)
            if 'aggregations' in response:
                self.transform_aggs(response['aggregations'])
                response['facets'] = response.pop('aggregations')
                response['hits'] = response.pop('hits')  # order
            return response
        return {}
Ejemplo n.º 11
0
    def _to_es_query(self, graph_query):
        """
        Takes a GraphQuery object and return an ES query.
        """
        assert isinstance(graph_query, GraphQuery)
        q = graph_query.to_dict()
        self.pipeline.result_transform.option_dotfield(q, dotdict())

        _q = []
        _scopes = []

        for k, v in q.items():
            if isinstance(v, list):
                for _v in v:
                    _q.append(_v)
                    _scopes.append(k)
            else:
                _q.append(v)
                _scopes.append(k)

        return self.pipeline.query_builder.default_match_query(
            _q, _scopes, dotdict()).query._proxied
Ejemplo n.º 12
0
    def build_graph_query(self, q, reverse=False, **options):

        query = self._build_graph_query(q)

        if reverse and q.reversible():
            _q = deepcopy(q)
            _q.reverse()
            query = query | self._build_graph_query(_q)

        search = Search().query(query) if query else Search()
        search = self.apply_extras(search, dotdict(options))

        return search
Ejemplo n.º 13
0
    def build(self, q=None, **options):
        """
        Build a query according to q and options.
        This is the public method called by API handlers.

        Regarding scopes:
            scopes: [str] nonempty, match query.
            scopes: NoneType, or [], no scope, so query string query.

        Additionally support these options:
            explain: include es scoring information
            userquery: customized function to interpret q

        * additional keywords are passed through as es keywords
            for example: 'explain', 'version' ...

        * multi-search is supported when q is a list. all queries
            are built individually and then sent in one request.

        """
        options = dotdict(options)

        if options.scroll_id:
            # bypass all query building stages
            return ESScrollID(options.scroll_id)

        if options.fetch_all:
            # clean up conflicting parameters
            options.pop('sort', None)
            options.pop('size', None)

        try:
            # process single q vs list of q(s).
            # dispatch 'val' vs 'key:val' to corresponding functions.

            if isinstance(q, list):
                search = MultiSearch()
                for _q in q:
                    _search = self._build_one(_q, options)
                    search = search.add(_search)
            else:  # str, int ...
                search = self._build_one(q, options)

        except IllegalOperation as exc:
            raise ValueError(str(exc))  # ex. sorting by -_score

        if options.get('rawquery'):
            raise RawQueryInterrupt(search.to_dict())

        return search
Ejemplo n.º 14
0
 def transform(self, response, options):
     """
     Transform the query result. TODO more
     """
     if not isinstance(options, dotdict):
         options = dotdict(options)
     if isinstance(response, list):
         responses_ = []
         template = options.pop('template', {})
         templates = options.pop('templates', [template] * len(response))
         template_hit = options.pop('template_hit', dict(found=True))
         template_miss = options.pop('template_miss', dict(found=False))
         responses = [self.transform(res, options) for res in response]
         for res_, res in zip(templates, responses):
             if not res.get('hits'):
                 res_.update(template_miss)
                 responses_.append(res_)
             else:
                 for hit in res['hits']:
                     hit_ = dict(res_)
                     hit_.update(template_hit)
                     hit_.update(hit)
                     responses_.append(hit_)
         return list(filter(None, responses_))
     if isinstance(response, dict):
         response.update(response.pop('hits', {}))  # collapse one level
         response.pop('_shards')
         response.pop('timed_out')
         if 'hits' in response:
             for hit in response['hits']:
                 hit.update(hit.pop('_source', {}))  # collapse one level
                 for path, obj in self.traverse(hit):
                     self.transform_hit(path, obj, options)
                     if options.allow_null:
                         self.option_allow_null(path, obj,
                                                options.allow_null)
                     if options.always_list:
                         self.option_always_list(path, obj,
                                                 options.always_list)
                     if options._sorted:
                         self.option_sorted(path, obj)
                 if options.dotfield:
                     self.option_dotfield(hit, options)
         if 'aggregations' in response:
             self.transform_aggregations(response['aggregations'])
             response['facets'] = response.pop('aggregations')
             response['hits'] = response.pop('hits')  # order
         return response
     return {}
Ejemplo n.º 15
0
    async def include_children(self, res, options): # modify in-place
        """
        Make additional queries to get the children field content.
        """

        # msearch result
        if isinstance(res, list):
            for search in res:
                await self.include_children(search, options)
            return
                
        try: # single query
            for hit in res['hits']['hits']:
                query = MytaxonQueryBuilder.build_lineage_query(hit['_id'], options)
                hit['children'] = await super().execute(query, dotdict())
        except KeyError:
            pass
Ejemplo n.º 16
0
    def query_dataset(self, chrom, start, ref, alt, assembly, dataset):
        # Initialzie output
        out = {'datasetId': dataset, 'exists': False}
        q_type = 'snp'

        # verify information and build query string
        if dataset in self.pos_dbs + self.assembly_dbs:
            if chrom and start and alt and assembly in self.assembly_keys:
                assembly = self.assembly_keys[
                    assembly]  #get hg assembly notation
                if alt[:3] == 'DEL':  # syntax: "alternateBases": "DEL85689"
                    q_type = 'del'
                    ref = ''
                elif alt[:3] == 'DUP':  # "alternateBases": "DUP85689"
                    q_type = 'dup'
                    ref = ''
                elif not ref:
                    q_type = 'ins'
                    ref = ''

                q = self.format_query_string(q_type, chrom, start, ref, alt,
                                             assembly, dataset)
                # perform query and format result
                # for now always search against hg19 index...
                res = self.web_settings.es_client.search(
                    index='_'.join([self.web_settings.ES_INDEX_BASE,
                                    assembly]),
                    doc_type=self.web_settings.ES_DOC_TYPE,
                    body={"query": {
                        "query_string": {
                            "query": q
                        }
                    }},
                    _source=[dataset])
                _transformer = ESResultTransformer(
                    options=dotdict({
                        'dotfield': True,
                        'assembly': assembly
                    }),
                    host=self.request.host,
                    source_metadata=self.web_settings.source_metadata())
                res = _transformer.clean_query_GET_response(res)
                if res and res.get('total') > 0:
                    out = self.format_output(res, out, q_type)
        return out
Ejemplo n.º 17
0
 def _get_cleaned_common_options(self, kwargs):
     """process options whatever the type of query (/query or annotation)"""
     options = dotdict()
     options.raw = kwargs.pop("raw", False)
     options.rawquery = kwargs.pop("rawquery", False)
     options.fetch_all = kwargs.pop("fetch_all", False)
     options.host = kwargs.pop("host", biothing_settings.ga_tracker_url)
     options.jsonld = kwargs.pop("jsonld", False)
     options.dotfield = kwargs.pop("dotfield", False)
     # override to add more options
     options = self._get_options(options, kwargs)
     scopes = kwargs.pop("scopes", None)
     if scopes:
         options.scopes = self._cleaned_scopes(scopes)
     kwargs = parse_sort_option(kwargs)
     for key in set(kwargs) - set(self._allowed_options):
         logging.debug("removing param '%s' from query" % key)
         del kwargs[key]
     return options
Ejemplo n.º 18
0
 def _get_cleaned_query_options(self, kwargs):
     """common helper for processing fields, kwargs and other options passed to ESQueryBuilder."""
     options = dotdict()
     options.raw = kwargs.pop('raw', False)
     options.rawquery = kwargs.pop('rawquery', False)
     options.fetch_all = kwargs.pop('fetch_all', False)
     options.host = kwargs.pop('host', self._settings.ga_tracker_url)
     options = self._get_options(options, kwargs)
     scopes = kwargs.pop('scopes', None)
     if scopes:
         options.scopes = self._cleaned_scopes(scopes)
     fields = kwargs.pop('fields', None)
     if fields:
         fields = self._cleaned_fields(fields)
         if fields:
             kwargs["_source"] = fields
     kwargs = self._parse_sort_option(kwargs)
     for key in set(kwargs) - set(self._allowed_options):
         del kwargs[key]
     options.kwargs = kwargs
     return options
Ejemplo n.º 19
0
    def _build_graph_query(self, graph_query):
        """
        Takes a GraphQuery object and return an ES Query object.
        """
        assert isinstance(graph_query, GraphQuery)
        q = graph_query.to_dict()

        _q = []
        _scopes = []

        for k, v in traverse(q, True):
            if isinstance(v, list):
                for _v in v:
                    _q.append(_v)
                    _scopes.append(k)
            else:
                _q.append(v)
                _scopes.append(k)

        # query proxy object does not support OR operator, thus using _proxied
        return self._build_match_query(_q, _scopes, dotdict()).query._proxied
Ejemplo n.º 20
0
    def parse(self, method, reqargs):
        """
        Parse a HTTP request, represented by its method and args,
        with this OptionSet and return an attribute dictionary.
        """

        options = self.optset.get(method, self.optset["*"])
        result = defaultdict(dict)  # to accomodate groups

        for keyword, option in options.items():
            try:
                val = option.parse(reqargs)
            except OptionError as err:
                err.info.setdefault("keyword", keyword)
                err.info["alias"] = option.get("alias")
                err.simplify()  # remove empty fields
                raise err  # with helpful info

            if val is not None:
                # TODO: build a new ds for returned result
                if 'group' in option:
                    group = option['group']
                    if isinstance(group, str):
                        result[group][keyword] = val
                    else:  # assume iterable
                        for _group in group:
                            result[_group][keyword] = val
                else:  # top level keywords
                    result[keyword] = val

        # make sure all named groups exist
        for group in self.groups:
            if group not in result:
                result[group] = {}

        return dotdict(result)
Ejemplo n.º 21
0
    def parse(self, method, args, path_args, path_kwargs):

        result = defaultdict(dict)
        options = {}

        rules = []  # expand * to kwarg_methods setting
        if not self._methods or method in self._methods:
            rules += list(self._options['*'].items())
        rules += list(self._options[method].items())

        # method precedence: specific > *
        for keyword, setting in rules:
            options[keyword] = setting

        # setting + inputs -> arg value
        for keyword, setting in options.items():
            arg = OptionArg(keyword, setting)
            val = arg.parse(args, path_args, path_kwargs)
            # discard no default value
            if val is not None:
                if 'group' in setting:
                    group = setting['group']
                    if isinstance(group, str):
                        result[group][keyword] = val
                    else:  # assume iterable
                        for _group in group:
                            result[_group][keyword] = val
                else:  # top level keywords
                    result[keyword] = val

        # make sure all named groups exist
        for group in self._groups:
            if group not in result:
                result[group] = {}

        return dotdict(result)
Ejemplo n.º 22
0
 def __init__(self, client, options=dotdict()):
     self.client = client
     self.options = options
Ejemplo n.º 23
0
    def transform(self, response, **options):
        """
        Transform the query response to a user-friendly structure.
        Mainly deconstruct the elasticsearch response structure and
        hand over to transform_doc to apply the options below.

        Options:
            # generic transformations for dictionaries
            # ------------------------------------------
            dotfield: flatten a dictionary using dotfield notation
            _sorted: sort keys alaphabetically in ascending order
            always_list: ensure the fields specified are lists or wrapped in a list
            allow_null: ensure the fields specified are present in the result,
                        the fields may be provided as type None or [].

            # additional multisearch result transformations
            # ------------------------------------------------
            template: base dict for every result, for example: {"success": true}
            templates: a different base for every result, replaces the setting above
            template_hit: a dict to update every positive hit result, default: {"found": true}
            template_miss: a dict to update every query with no hit, default: {"found": false}

            # document format and content management
            # ---------------------------------------
            biothing_type: result document type to apply customized transformation.
                        for example, add license field basing on document type's metadata.
            one: return the individual document if there's only one hit. ignore this setting
                if there are multiple hits. return None if there is no hit. this option is
                not effective when aggregation results are also returned in the same query.
            native: bool, if the returned result is in python primitive types.
            version: bool, if _version field is kept.
            score: bool, if _score field is kept.

        """
        options = dotdict(options)
        if isinstance(response, list):
            responses_ = []
            options.pop('one', None)  # ignore
            template = options.pop('template', {})
            templates = options.pop('templates', [template] * len(response))
            template_hit = options.pop('template_hit', dict(found=True))
            template_miss = options.pop('template_miss', dict(found=False))
            responses = [self.transform(res, **options) for res in response]
            for tpl, res in zip(templates, responses):
                for _res in res if isinstance(res, list) else [res]:
                    assert isinstance(_res, dict)
                    if _res and 'hits' not in _res:
                        hit_ = dict(tpl)
                        hit_.update(template_hit)
                        hit_.update(_res)
                        responses_.append(hit_)
                        continue
                    if not _res or not _res['hits']:
                        tpl.update(template_miss)
                        responses_.append(tpl)
                        continue
                    for hit in _res['hits']:
                        hit_ = dict(tpl)
                        hit_.update(template_hit)
                        hit_.update(hit)
                        responses_.append(hit_)
            return list(filter(None, responses_))

        if isinstance(response, dict):
            response = self._Hits(response)
            response.collapse('hits')
            response.exclude(('_shards', '_node', 'timed_out'))
            response.wrap('hits', self._Doc)

            for hit in response['hits']:
                hit.collapse('_source')
                # 'sort' is introduced when sorting
                hit.exclude(('_index', '_type', 'sort'))
                self._transform_hit(hit, options)

            if options.get('native', True):
                response['hits'] = [hit.data for hit in response['hits']]
                response = response.data

            if 'aggregations' in response:
                self.transform_aggs(response['aggregations'])
                response['facets'] = response.pop('aggregations')
                hits = response.pop('hits')  # move key order
                if hits:  # hide "hits" field when size=0
                    response['hits'] = hits

            elif options.get('one'):
                # prefer one-level presentation
                # or structures as simple as possible
                if len(response['hits']) == 1:
                    response = response['hits'][0]
                elif len(response['hits']) == 0:
                    response = None
                else:  # show a list of docs
                    response = response['hits']

            return response

        raise TypeError()