Beispiel #1
0
    def run_query(self, es_query, es_type=None):
        """
        Run a more advanced POST based ES query

        Returns the raw query json back, or None if there's an error
        """

        logger.info(
            "ESlog: [%s.%s] ESquery: %s" %
            (self.__class__.__name__, self.domain, json.dumps(es_query)))
        if 'fields' in es_query or 'script_fields' in es_query:
            #nasty hack to add domain field to query that does specific fields.
            #do nothing if there's no field query because we get everything
            fields = es_query.get('fields', [])
            fields.append('domain')
            es_query['fields'] = fields

        try:
            es_results = self.es_interface.search(self.index,
                                                  es_type,
                                                  body=es_query)
            report_and_fail_on_shard_failures(es_results)
        except ElasticsearchException as e:
            if 'query_string' in es_query.get('query',
                                              {}).get('filtered',
                                                      {}).get('query', {}):
                # the error may have been caused by a bad query string
                # re-run with no query string to check
                querystring = es_query['query']['filtered']['query'][
                    'query_string']['query']
                new_query = es_query
                new_query['query']['filtered']['query'] = {"match_all": {}}
                new_results = self.run_query(new_query)
                if new_results:
                    # the request succeeded without that query string
                    # an error with a blank query will return None
                    raise ESUserError("Error with elasticsearch query: %s" %
                                      querystring)

            msg = "Error in elasticsearch query [%s]: %s\nquery: %s" % (
                self.index, str(e), es_query)
            raise ESError(msg)

        hits = []
        for res in es_results['hits']['hits']:
            if '_source' in res:
                res_domain = res['_source'].get('domain', None)
            elif 'fields' in res:
                res['fields'] = flatten_field_dict(res)
                res_domain = res['fields'].get('domain', None)

            # security check
            if res_domain == self.domain:
                hits.append(res)
            else:
                logger.info(
                    "Requester domain %s does not match result domain %s" %
                    (self.domain, res_domain))
        es_results['hits']['hits'] = hits
        return es_results
Beispiel #2
0
 def normalize_result(query, result):
     """Return the doc from an item in the query response."""
     if query._exclude_source:
         return result['_id']
     if query._legacy_fields:
         return flatten_field_dict(result, fields_property='_source')
     else:
         return result['_source']
Beispiel #3
0
 def hits(self):
     """Return the docs from the response."""
     if self.query._fields == []:
         return self.ids
     elif self.query._fields is not None:
         return [flatten_field_dict(r) for r in self.raw_hits]
     else:
         return [r['_source'] for r in self.raw_hits]
Beispiel #4
0
 def normalize_result(query, result):
     """Return the doc from an item in the query response."""
     if query._exclude_source:
         return result['_id']
     if query._legacy_fields:
         return flatten_field_dict(result, fields_property='_source')
     else:
         return result['_source']
Beispiel #5
0
    def run_query(self, es_query, es_type=None):
        """
        Run a more advanced POST based ES query

        Returns the raw query json back, or None if there's an error
        """

        logger.info("ESlog: [%s.%s] ESquery: %s" % (self.__class__.__name__, self.domain, json.dumps(es_query)))
        if 'fields' in es_query or 'script_fields' in es_query:
            #nasty hack to add domain field to query that does specific fields.
            #do nothing if there's no field query because we get everything
            fields = es_query.get('fields', [])
            fields.append('domain')
            es_query['fields'] = fields

        try:
            es_results = self.es.search(self.index, es_type, body=es_query)
            report_shard_failures(es_results)
        except ElasticsearchException as e:
            if 'query_string' in es_query.get('query', {}).get('filtered', {}).get('query', {}):
                # the error may have been caused by a bad query string
                # re-run with no query string to check
                querystring = es_query['query']['filtered']['query']['query_string']['query']
                new_query = es_query
                new_query['query']['filtered']['query'] = {"match_all": {}}
                new_results = self.run_query(new_query)
                if new_results:
                    # the request succeeded without that query string
                    # an error with a blank query will return None
                    raise ESUserError("Error with elasticsearch query: %s" %
                        querystring)

            msg = "Error in elasticsearch query [%s]: %s\nquery: %s" % (self.index, str(e), es_query)
            raise ESError(msg)

        hits = []
        for res in es_results['hits']['hits']:
            if '_source' in res:
                res_domain = res['_source'].get('domain', None)
            elif 'fields' in res:
                res['fields'] = flatten_field_dict(res)
                res_domain = res['fields'].get('domain', None)

            # security check
            if res_domain == self.domain:
                hits.append(res)
            else:
                logger.info("Requester domain %s does not match result domain %s" % (
                    self.domain, res_domain))
        es_results['hits']['hits'] = hits
        return es_results
Beispiel #6
0
def es_query(params=None,
             facets=None,
             terms=None,
             q=None,
             es_index=None,
             start_at=None,
             size=None,
             dict_only=False,
             fields=None,
             facet_size=None):
    if terms is None:
        terms = []
    if q is None:
        q = {}
    else:
        q = copy.deepcopy(q)
    if params is None:
        params = {}

    q["size"] = size if size is not None else q.get("size", SIZE_LIMIT)
    q["from"] = start_at or 0

    def get_or_init_anded_filter_from_query_dict(qdict):
        and_filter = qdict.get("filter", {}).pop("and", [])
        filter = qdict.pop("filter", None)
        if filter:
            and_filter.append(filter)
        return {"and": and_filter}

    filter = get_or_init_anded_filter_from_query_dict(q)

    def convert(param):
        #todo: find a better way to handle bools, something that won't break fields that may be 'T' or 'F' but not bool
        if param == 'T' or param is True:
            return 1
        elif param == 'F' or param is False:
            return 0
        return param

    for attr in params:
        if attr not in terms:
            attr_val = [convert(params[attr])] if not isinstance(
                params[attr], list) else [convert(p) for p in params[attr]]
            filter["and"].append({"terms": {attr: attr_val}})

    if facets:
        q["facets"] = q.get("facets", {})
        if isinstance(facets, list):
            for facet in facets:
                q["facets"][facet] = {
                    "terms": {
                        "field": facet,
                        "size": facet_size or SIZE_LIMIT
                    }
                }
        elif isinstance(facets, dict):
            q["facets"].update(facets)

    if filter["and"]:
        query = q.pop("query", {})
        q["query"] = {
            "filtered": {
                "filter": filter,
            }
        }
        q["query"]["filtered"]["query"] = query if query else {"match_all": {}}

    if fields is not None:
        q["fields"] = q.get("fields", [])
        q["fields"].extend(fields)

    if dict_only:
        return q

    es_index = es_index or 'domains'
    es = get_es_new()
    meta = ES_META[es_index]

    try:
        result = es.search(meta.index, meta.type, body=q)
    except ElasticsearchException as e:
        raise ESError(e)

    if fields is not None:
        for res in result['hits']['hits']:
            flatten_field_dict(res)

    return result
Beispiel #7
0
def es_query(params=None, facets=None, terms=None, q=None, es_index=None, start_at=None, size=None, dict_only=False,
             fields=None, facet_size=None):
    if terms is None:
        terms = []
    if q is None:
        q = {}
    else:
        q = copy.deepcopy(q)
    if params is None:
        params = {}

    q["size"] = size if size is not None else q.get("size", SIZE_LIMIT)
    q["from"] = start_at or 0

    def get_or_init_anded_filter_from_query_dict(qdict):
        and_filter = qdict.get("filter", {}).pop("and", [])
        filter = qdict.pop("filter", None)
        if filter:
            and_filter.append(filter)
        return {"and": and_filter}

    filter = get_or_init_anded_filter_from_query_dict(q)

    def convert(param):
        #todo: find a better way to handle bools, something that won't break fields that may be 'T' or 'F' but not bool
        if param == 'T' or param is True:
            return 1
        elif param == 'F' or param is False:
            return 0
        return param

    for attr in params:
        if attr not in terms:
            attr_val = [convert(params[attr])] if not isinstance(params[attr], list) else [convert(p) for p in params[attr]]
            filter["and"].append({"terms": {attr: attr_val}})

    if facets:
        q["facets"] = q.get("facets", {})
        if isinstance(facets, list):
            for facet in facets:
                q["facets"][facet] = {"terms": {"field": facet, "size": facet_size or SIZE_LIMIT}}
        elif isinstance(facets, dict):
            q["facets"].update(facets)

    if filter["and"]:
        query = q.pop("query", {})
        q["query"] = {
            "filtered": {
                "filter": filter,
            }
        }
        q["query"]["filtered"]["query"] = query if query else {"match_all": {}}

    if fields is not None:
        q["fields"] = q.get("fields", [])
        q["fields"].extend(fields)

    if dict_only:
        return q

    es_index = es_index or 'domains'
    es = get_es_new()
    meta = ES_META[es_index]

    try:
        result = es.search(meta.index, meta.type, body=q)
    except ElasticsearchException as e:
        raise ESError(e)

    if fields is not None:
        for res in result['hits']['hits']:
            flatten_field_dict(res)

    return result