Ejemplo n.º 1
0
def cernopendata_search_factory(self, search):
    """Customized parse query using invenio query parser.

    :param self: REST view
    :param search: Elastic search DSL search instance

    :return: Tuple with search instance and URL arguments
    """
    query_string = request.values.get("q")
    try:
        search = search.query(cernopendata_query_parser(query_string))
    except SyntaxError:
        current_app.logger.debug("Failed parsing query: {0}".format(
            request.values.get("q", "")),
                                 exc_info=True)
        raise InvalidQueryRESTError()

    search_index = search._index[0]
    search, url_kwargs = default_facets_factory(search, search_index)
    search, sort_kwargs = default_sorter_factory(search, search_index)
    for key, value in sort_kwargs.items():
        url_kwargs.add(key, value)
    url_kwargs.add("q", query_string)

    return search, url_kwargs
Ejemplo n.º 2
0
def search_factory(self, search, query_parser=None):
    """Parse query using elasticsearch DSL query.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory
    search_index = search._index[0]

    search, urlkwargs = default_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    # Apply 'identity' grouping by default
    if 'group_by' not in request.values:
        search = search.filter(Q('term', Grouping='identity'))
        urlkwargs['group_by'] = 'identity'

    try:
        query_string = request.values.get('q')
        if query_string:
            search = search.query(
                Q('query_string',
                  query=query_string,
                  default_field='_search_all'))
            urlkwargs['q'] = query_string
    except SyntaxError:
        raise InvalidQueryRESTError()

    # Exclude the identifiers by which the search was made (large aggregate)
    search = search.source(exclude=['*.SearchIdentifier'])
    return search, urlkwargs
Ejemplo n.º 3
0
def cap_search_factory(self, search, query_parser=None):
    """Customized Parse query using Invenio-Query-Parser.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    def _default_parser(qstr=None):
        """Default parser that uses the Q() from elasticsearch_dsl."""
        if qstr:
            return Q('query_string', query=qstr)
        return Q()

    query_string = request.values.get('q')
    query_parser = query_parser or _default_parser

    try:
        search = search.query(query_parser(query_string))
    except SyntaxError:
        current_app.logger.debug("Failed parsing query: {0}".format(
            request.values.get('q', '')),
                                 exc_info=True)
        raise InvalidQueryRESTError()

    search_index = search._index[0]
    search, urlkwargs = cap_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    urlkwargs.add('q', query_string)
    return search, urlkwargs
Ejemplo n.º 4
0
def inspire_search_factory(self, search):
    """Parse query using Invenio-Query-Parser.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory

    query_string = request.values.get('q', '')

    try:
        search = search.query(IQ(query_string, search))
    except SyntaxError:
        current_app.logger.debug("Failed parsing query: {0}".format(
            request.values.get('q', '')),
                                 exc_info=True)
        raise InvalidQueryRESTError()
    finally:
        if current_app.debug:
            current_app.logger.debug(json.dumps(search.to_dict(), indent=4))

    search_index = search._index[0]
    search, urlkwargs = default_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    urlkwargs.add('q', query_string)
    return search, urlkwargs
Ejemplo n.º 5
0
def inspire_search_factory(self, search):
    """Parse query using Inspire-Query-Parser.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    query_string = request.values.get('q', '')
    urlkwargs = MultiDict()

    try:
        search = search.query_from_iq(query_string)
    except SyntaxError:
        current_app.logger.debug("Failed parsing query: {0}".format(
            request.values.get('q', '')),
                                 exc_info=True)
        raise InvalidQueryRESTError()

    search_index = search._index[0]
    search, urlkwargs = inspire_filter_factory(search, urlkwargs, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    search = select_source(search)

    urlkwargs.add('q', query_string)
    current_app.logger.debug(json.dumps(search.to_dict(), indent=4))

    return search, urlkwargs
Ejemplo n.º 6
0
def meta_search_factory(self, search, query_parser=None):
    """Parse query using elasticsearch DSL query.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory

    # for field in (['keyword']):
    #     if field not in request.values:
    #         raise RESTValidationError(
    #             errors=[FieldError(field, 'Required field.')])

    search, urlkwargs = default_facets_factory(search, "metadata")
    search, sortkwargs = default_sorter_factory(search, "metadata")
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    search = search.extra(size=0)

    # Apply 'identity' grouping by default
    search = search.filter(Q('term', RelationshipType='Cites'))
    if 'group_by' not in request.values:
        search = search.filter(Q('term', Grouping='identity'))
        urlkwargs['group_by'] = 'identity'

    try:
        query_string = request.values.get('q')
        if query_string:
            search = search.query(
                Q('query_string',
                  query=query_string,
                  default_field='_search_all'))
            urlkwargs['q'] = query_string
    except SyntaxError:
        raise InvalidQueryRESTError()
    size = 10
    if 'size' in request.values:
        size = request.values.get('size')

    start = 0
    if 'page' in request.values:
        start = int(int(request.values.get('page')) - 1) * int(size)

    search.aggs.bucket('Target', 'terms', field='Target.ID', size=1000)\
    .metric("first", "top_hits", _source=dict(include=["Target.Identifier.*", "Target.Creator.Name", "Target.Title"]),  size=1)
    kwargs = {
        'from': start,
        'size': size,
        'sort': [{
            '_count': {
                'order': 'desc'
            }
        }]
    }
    search.aggs['Target'].bucket('pagination', 'bucket_sort', **kwargs)
    return search, urlkwargs
Ejemplo n.º 7
0
def inspire_search_factory(self, search):
    query_string = request.values.get("q", "")

    try:
        search = search.query_from_iq(query_string)
    except SyntaxError as exc:
        LOGGER.warning("Failed parsing query",
                       query=request.values.get("q", ""))
        raise InvalidQueryRESTError() from exc

    return query_string, search
Ejemplo n.º 8
0
def circulation_search_factory(self, search, query_parser=None):
    """Parse query using elasticsearch DSL query.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    def _default_parser(qstr=None):
        """Return default parser that uses the Q() from elasticsearch_dsl."""
        if qstr:
            return Q('query_string', query=qstr)
        return Q()

    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory

    query_string = request.values.get('q', '')
    query = _default_parser(qstr=query_string)

    # if the logged in user in not librarian or admin, validate the query
    if not backoffice_permission().allows(g.identity):
        # patron can find only his loans
        try:
            if not query_string:
                # force query to be patron_pid:<logged in user>
                only_patron_loans = 'patron_pid:{}'.format(g.identity.id)
                query = _default_parser(qstr=only_patron_loans)
            else:
                # check for patron_pid query value
                match = re.match(r"patron_pid:(?P<pid>\d)", query_string)
                if match and match.group('pid') != str(g.identity.id):
                    raise UnauthorizedSearch()
        except UnauthorizedSearch:
            current_app.logger.debug(
                "Search for `{0}` not allowed by `patron_pid:{1}`".format(
                    query_string, str(g.identity.id))
            )
            abort(403)

    try:
        search = search.query(query)
    except SyntaxError:
        current_app.logger.debug(
            "Failed parsing query: {0}".format(query_string), exc_info=True)
        raise InvalidQueryRESTError()

    search_index = search._index[0]
    search, urlkwargs = default_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    urlkwargs.add('q', query_string)
    return search, urlkwargs
def cap_search_factory(self, search, query_parser=None):
    """Customize Parse query using Invenio-Query-Parser.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    def _default_parser(qstr=None, **kwargs):
        """Use of the Q() from elasticsearch_dsl."""
        def _escape_qstr(qstr):
            return ''.join((ESCAPE_CHAR_MAP.get(char, char) for char in qstr))

        query = Q('query_string',
                  query=_escape_qstr(qstr),
                  analyzer="lowercase_whitespace_analyzer",
                  analyze_wildcard=True,
                  default_operator='AND') if qstr else Q()

        # resolve keywords to queries
        for k, v in kwargs.items():
            if k in KEYWORD_TO_QUERY:
                if v == 'True':
                    query = query & KEYWORD_TO_QUERY[k]()
                elif v == 'False':
                    query = query & ~KEYWORD_TO_QUERY[k]()

        return query

    query_string = request.values.get('q')

    # parse url params to search for keywords
    query_keywords = {
        k: request.values[k]
        for k in KEYWORD_TO_QUERY.keys() if k in request.values
    }
    query_parser = query_parser or _default_parser

    try:
        search = search.query(query_parser(query_string, **query_keywords))
    except SyntaxError:
        current_app.logger.debug("Failed parsing query: {0}".format(
            request.values.get('q', '')),
                                 exc_info=True)
        raise InvalidQueryRESTError()

    search_index = search._index[0]
    search, urlkwargs = cap_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    urlkwargs.add('q', query_string)
    return search, urlkwargs
Ejemplo n.º 10
0
def perform_query(query_string, page, size):
    """

    :param query_string:
    :param page:
    :param size:
    :return:
    """
    try:
        query = InspireQuery(query_string)[(page - 1) * size:page * size]
    except SyntaxError:
        current_app.logger.debug("Failed parsing query: {0}".format(
            request.values.get('q', '')),
                                 exc_info=True)
        raise InvalidQueryRESTError()
    return query, {'q': query_string}
Ejemplo n.º 11
0
def and_search_factory(self, search, query_parser=None):
    """Parse query using elasticsearch DSL query.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    def _default_parser(qstr=None):
        """Default parser that uses the Q() from elasticsearch_dsl."""
        if qstr:
            return Q('query_string', query=qstr, default_operator='AND')
        return Q()

    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory

    query_string = request.values.get('q')
    query_parser = query_parser or _default_parser

    try:
        search = search.query(query_parser(query_string))
    except SyntaxError:
        current_app.logger.debug(
            f'Failed parsing query: {request.values.get("q", "")}',
            exc_info=True,
        )
        raise InvalidQueryRESTError()

    search_index = search._index[0]
    search, urlkwargs = default_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    urlkwargs.add('q', query_string)

    # include deleted
    deleted = request.args.get('deleted')
    if not deleted:
        search = search.filter('bool', must_not=[Q('exists', field='deleted')])

    return search, urlkwargs
Ejemplo n.º 12
0
def cernopendata_search_factory(self, search, query_parser=None):
    """Customized Parse query using Invenio-Query-Parser.

    By default we hide the results that have availability:ondemand.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    def _default_parser(qstr=None, ondemand=None):
        """Default parser that uses the Q() from elasticsearch_dsl."""
        q = Q('query_string', query=qstr) if qstr else Q()

        # by default hide ondemand ones
        if not ondemand:
            q = q & ~Q('match', **{'availability.keyword': 'ondemand'})

        return q

    query_string = request.values.get('q')
    ondemand = request.values.get('ondemand', False)  # this is a workaround
    query_parser = query_parser or _default_parser

    try:
        search = search.query(query_parser(query_string, ondemand))

    except SyntaxError:
        current_app.logger.debug("Failed parsing query: {0}".format(
            request.values.get('q', '')),
                                 exc_info=True)
        raise InvalidQueryRESTError()

    search_index = search._index[0]
    search, urlkwargs = cernopendata_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    urlkwargs.add('q', query_string)
    return search, urlkwargs
Ejemplo n.º 13
0
def deposit_search_factory(self, search):
    """Replace default search factory to use custom facet factory."""
    from invenio_records_rest.sorter import default_sorter_factory
    query_string = request.values.get('q', '')
    query_parser = Q('query_string',
                     query=query_string) if query_string else Q()

    try:
        search = search.query(query_parser)
    except SyntaxError:
        current_app.logger.debug("Failed parsing query: {0}".format(
            request.values.get('q', '')),
                                 exc_info=True)
        raise InvalidQueryRESTError()

    search_index = search._index[0]
    search, urlkwargs = deposit_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    urlkwargs.add('q', query_string)
    return search, urlkwargs
Ejemplo n.º 14
0
def item_search_factory(self,
                        search,
                        start_date,
                        end_date,
                        list_index_id=None):
    """Factory for opensearch.

    :param self:
    :param search: Record Search's instance
    :param start_date: Start date for search
    :param end_date: End date for search
    :param list_index_id: index tree list or None
    :return:
    """
    def _get_query(start_term, end_term, indexes):
        query_string = "_type:{} AND " \
                       "relation_version_is_last:true AND " \
                       "publish_status:0 AND " \
                       "publish_date:[{} TO {}]".format(current_app.config[
                           "INDEXER_DEFAULT_DOC_TYPE"],
                           start_term,
                           end_term)
        query_filter = []
        if indexes:
            for index in indexes:
                q_wildcard = {"wildcard": {"path": "*{}*".format(index)}}
                query_filter.append(q_wildcard)
        query_q = {
            "size": 10000,
            "query": {
                "bool": {
                    "must": [{
                        "query_string": {
                            "query": query_string
                        }
                    }, {
                        "bool": {
                            "should": query_filter
                        }
                    }]
                }
            },
            "sort": [{
                "publish_date": {
                    "order": "desc"
                }
            }]
        }
        return query_q

    query_q = _get_query(start_date, end_date, list_index_id)
    urlkwargs = MultiDict()
    try:
        extr = search._extra.copy()
        search.update_from_dict(query_q)
        search._extra.update(extr)
    except SyntaxError:
        current_app.logger.debug("Failed parsing query: {0}".format(query_q),
                                 exc_info=True)
        raise InvalidQueryRESTError()

    return search, urlkwargs
Ejemplo n.º 15
0
def item_path_search_factory(search, index_id="0"):
    """Parse query using Weko-Query-Parser.

    :param search: Elastic search DSL search instance.
    :param index_id: Index Identifier contains item's path
    :returns: Tuple with search instance and URL arguments.
    """
    def _get_index_search_query():
        """Get index search query."""
        query_q = {
            "from": 0,
            "size": 10000,
            "_source": {
                "excludes": ["content", "_item_metadata"]
            },
            "query": {
                "bool": {
                    "must": [{
                        "match": {
                            "path.tree": "@index"
                        }
                    }, {
                        "match": {
                            "relation_version_is_last": "true"
                        }
                    }]
                }
            },
            "post_filter": {
                "bool": {
                    "must": [{
                        "match": {
                            "publish_status": "0"
                        }
                    }, {
                        "range": {
                            "publish_date": {
                                "lte": "now/d"
                            }
                        }
                    }]
                }
            }
        }

        q = str(index_id)
        if q != str(current_app.config.get("WEKO_ROOT_INDEX",
                                           WEKO_ROOT_INDEX)):
            post_filter = query_q['post_filter']

            if post_filter:
                list_path = Indexes.get_list_path_publish(index_id)
                post_filter['bool']['must'].append(
                    {"terms": {
                        "path": list_path
                    }})
            # create search query
            try:
                fp = Indexes.get_self_path(q)
                query_q = json.dumps(query_q).replace("@index", fp.path)
                query_q = json.loads(query_q)
            except BaseException:
                pass
        else:
            post_filter = query_q['post_filter']

            if post_filter:
                list_path = Indexes.get_list_path_publish(index_id)
                post_filter['bool']['must'].append(
                    {"terms": {
                        "path": list_path
                    }})
            wild_card = []
            child_list = Indexes.get_child_list(q)
            if child_list:
                for item in child_list:
                    wc = {"wildcard": {"path.tree": item.cid}}
                    wild_card.append(wc)
                query_q['query']['bool']['must'] = [{
                    "bool": {
                        "should": wild_card
                    }
                }, {
                    "match": {
                        "relation_version_is_last": "true"
                    }
                }]
        return query_q

    # create a index search query
    query_q = _get_index_search_query()
    try:
        # Aggregations.
        extr = search._extra.copy()
        search.update_from_dict(query_q)
        search._extra.update(extr)
    except SyntaxError:
        current_app.logger.debug("Failed parsing query: {0}".format(query_q),
                                 exc_info=True)
        raise InvalidQueryRESTError()

    return search
Ejemplo n.º 16
0
def item_path_search_factory(self, search, index_id=None):
    """Parse query using Weko-Query-Parser.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    def _get_index_earch_query():

        query_q = {
            "_source": {
                "exclude": ['content']
            },
            "query": {
                "match": {
                    "path.tree": "@index"
                }
            },
            "aggs": {
                "path": {
                    "terms": {
                        "field": "path.tree",
                        "include": "@index|@index/[^/]+"
                    },
                    "aggs": {
                        "date_range": {
                            "filter": {
                                "match": {
                                    "publish_status": "0"
                                }
                            },
                            "aggs": {
                                "available": {
                                    "range": {
                                        "field":
                                        "publish_date",
                                        "ranges": [{
                                            "from": "now+1d/d"
                                        }, {
                                            "to": "now+1d/d"
                                        }]
                                    },
                                }
                            }
                        },
                        "no_available": {
                            "filter": {
                                "bool": {
                                    "must_not": [{
                                        "match": {
                                            "publish_status": "0"
                                        }
                                    }]
                                }
                            }
                        }
                    }
                }
            },
            "post_filter": {
                "term": {
                    "path": "@index"
                }
            }
        }

        # add item type aggs
        query_q['aggs']['path']['aggs']. \
            update(get_item_type_aggs(search._index[0]))

        mut = get_permission_filter()
        if mut:
            mut = list(map(lambda x: x.to_dict(), mut))
            post_filter = query_q['post_filter']
            if mut[0].get('bool'):
                post_filter['bool'] = {
                    'must': [{
                        'term': post_filter.pop('term')
                    }, mut[0]['bool']['must'][0]],
                    'should':
                    mut[0]['bool']['should']
                }
                # post_filter['bool'] = {'must': [{'term': post_filter.pop('term')}],
                #                        'should': mut[0]['bool']['should']}
            else:
                mut.append({'term': post_filter.pop('term')})
                post_filter['bool'] = {'must': mut}

        # create search query
        q = request.values.get('q') if index_id is None else index_id
        if q:
            try:
                fp = Indexes.get_self_path(q)
                if fp:
                    query_q = json.dumps(query_q).replace("@index", fp.path)
                    query_q = json.loads(query_q)
            except BaseException:
                pass
        return query_q

    # create a index search query
    query_q = _get_index_earch_query()

    urlkwargs = MultiDict()
    try:
        # Aggregations.
        extr = search._extra.copy()
        search.update_from_dict(query_q)
        search._extra.update(extr)
    except SyntaxError:
        q = request.values.get('q', '') if index_id is None else index_id
        current_app.logger.debug("Failed parsing query: {0}".format(q),
                                 exc_info=True)
        raise InvalidQueryRESTError()

    from invenio_records_rest.sorter import default_sorter_factory
    search_index = search._index[0]
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    urlkwargs.add('q', query_q)
    return search, urlkwargs
Ejemplo n.º 17
0
def search_factory(self, search, query_parser=None):
    """Parse query using elasticsearch DSL query.

    Terms defined by: RERO_ILS_QUERY_BOOSTING will be boosted
    at the query level.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :param query_parser: a specific query parser
    :return: Tuple with search instance and URL arguments.
    """
    def _default_parser(qstr=None, query_boosting=[]):
        """Default parser that uses the Q() from elasticsearch_dsl."""
        query_type = 'query_string'
        default_operator = 'OR'
        if request.args.get('simple'):
            query_type = 'simple_query_string'
            default_operator = 'AND'

        if qstr:
            # TODO: remove this bad hack
            qstr = _PUNCTUATION_REGEX.sub(' ', qstr)
            qstr = re.sub('\s+', ' ', qstr).rstrip()
            if not query_boosting:
                return Q(query_type,
                         query=qstr,
                         default_operator=default_operator)
            else:
                return Q('bool',
                         should=[
                             Q(query_type,
                               query=qstr,
                               boost=2,
                               fields=query_boosting,
                               default_operator=default_operator),
                             Q(query_type,
                               query=qstr,
                               default_operator=default_operator)
                         ])
        return Q()

    def _boosting_parser(query_boosting, search_index):
        """Elasticsearch boosting fields parser."""
        boosting = []
        if search_index in query_boosting:
            for field, boost in query_boosting[search_index].items():
                boosting.append('{}^{}'.format(field, boost))
        return boosting

    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory

    query_string = request.values.get('q')
    display_score = request.values.get('display_score')
    if display_score:
        search = search.extra(explain=True)
    query_parser = query_parser or _default_parser

    search_index = search._index[0]
    query_boosting = _boosting_parser(
        current_app.config['RERO_ILS_QUERY_BOOSTING'], search_index)

    try:
        search = search.query(query_parser(query_string, query_boosting))
    except SyntaxError:
        current_app.logger.debug(
            'Failed parsing query: {0}'.format(request.values.get('q', '')),
            exc_info=True,
        )
        raise InvalidQueryRESTError()

    search, urlkwargs = default_facets_factory(search, search_index)
    # i18n translated facets
    search = i18n_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)
    urlkwargs.add('q', query_string)
    return search, urlkwargs
Ejemplo n.º 18
0
Archivo: query.py Proyecto: mhaya/weko
def default_search_factory(self, search, query_parser=None, search_type=None):
    """Parse query using Weko-Query-Parser. MetaData Search.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :param query_parser: Query parser. (Default: ``None``)
    :returns: Tuple with search instance and URL arguments.
    """
    def _get_search_qs_query(qs=None):
        """Qs of search bar keywords for detail simple search.

        :param qs: Query string.
        :return: Query parser.
        """
        q = Q('query_string',
              query=qs,
              default_operator='and',
              fields=['search_*', 'search_*.ja']) if qs else None
        return q

    def _get_detail_keywords_query():
        """Get keywords query.

        :return: Query parser.
        """
        def _get_keywords_query(k, v):
            qry = None
            kv = request.values.get(k)
            if not kv:
                return

            if isinstance(v, str):
                name_dict = dict(operator="and")
                name_dict.update(dict(query=kv))
                qry = Q('match', **{v: name_dict})
            elif isinstance(v, list):
                qry = Q('multi_match',
                        query=kv,
                        type='most_fields',
                        minimum_should_match='75%',
                        operator='and',
                        fields=v)
            elif isinstance(v, dict):
                for key, vlst in v.items():
                    if isinstance(vlst, list):
                        shud = []
                        kvl = [
                            x for x in kv.split(',')
                            if x.isdecimal() and int(x) < len(vlst)
                        ]
                        for j in map(partial(lambda x, y: x[int(y)], vlst),
                                     kvl):
                            name_dict = dict(operator="and")
                            name_dict.update(dict(query=j))
                            shud.append(Q('match', **{key: name_dict}))

                        kvl = [
                            x for x in kv.split(',')
                            if not x.isdecimal() and x in vlst
                        ]
                        for j in kvl:
                            name_dict = dict(operator="and")
                            name_dict.update(dict(query=j))
                            shud.append(Q('match', **{key: name_dict}))
                        if shud:
                            return Q('bool', should=shud)
            elif isinstance(v, tuple) and len(v) >= 2:
                shud = []
                for i in map(lambda x: v[1](x), kv.split(',')):
                    shud.append(Q('term', **{v[0]: i}))
                if shud:
                    qry = Q('bool', should=shud)

            return qry

        def _get_nested_query(k, v):
            # text value
            kv = request.values.get(k)
            if not kv:
                return

            shuld = []
            if isinstance(v, tuple) and len(v) > 1 and isinstance(v[1], dict):
                # attr keyword in request url
                for attr_key, attr_val_str in map(
                        lambda x: (x, request.values.get(x)),
                        list(v[1].keys())):
                    attr_obj = v[1].get(attr_key)
                    if isinstance(attr_obj, dict) and attr_val_str:
                        if isinstance(v[0], str) and not len(v[0]):
                            # For ID search
                            for key in attr_val_str.split(','):
                                attr = attr_obj.get(key)
                                if isinstance(attr, tuple):
                                    attr = [attr]

                                if isinstance(attr, list):
                                    for alst in attr:
                                        if isinstance(alst, tuple):
                                            val_attr_lst = alst[1].split('=')
                                            name = alst[0] + ".value"
                                            name_dict = dict(operator="and")
                                            name_dict.update(dict(query=kv))
                                            mut = [
                                                Q('match', **{name: name_dict})
                                            ]

                                            qt = None
                                            if '=*' not in alst[1]:
                                                name = alst[0] + \
                                                    "." + val_attr_lst[0]
                                                qt = [
                                                    Q(
                                                        'term', **{
                                                            name:
                                                            val_attr_lst[1]
                                                        })
                                                ]

                                            mut.extend(qt or [])
                                            qry = Q('bool', must=mut)
                                            shuld.append(
                                                Q('nested',
                                                  path=alst[0],
                                                  query=qry))
                        else:
                            attr_key_hit = [
                                x for x in attr_obj.keys() if v[0] + "." in x
                            ]
                            if attr_key_hit:
                                vlst = attr_obj.get(attr_key_hit[0])
                                if isinstance(vlst, list):
                                    attr_val = [
                                        x for x in attr_val_str.split(',')
                                        if x.isdecimal() and int(x) < len(vlst)
                                    ]
                                    if attr_val:
                                        shud = []
                                        name = v[0] + ".value"
                                        name_dict = dict(operator="and")
                                        name_dict.update(dict(query=kv))
                                        qm = Q('match', **{name: name_dict})

                                        for j in map(
                                                partial(
                                                    lambda m, n: m[int(n)],
                                                    vlst), attr_val):
                                            name = attr_key_hit[0]
                                            qm = Q('term', **{name: j})
                                            shud.append(qm)

                                        shuld.append(
                                            Q('nested',
                                              path=v[0],
                                              query=Q('bool',
                                                      should=shud,
                                                      must=[qm])))

            return Q('bool', should=shuld) if shuld else None

        def _get_date_query(k, v):
            # text value
            qry = None
            if isinstance(v, list) and len(v) >= 2:
                date_from = request.values.get(k + "_" + v[0][0])
                date_to = request.values.get(k + "_" + v[0][1])
                if not date_from or not date_to:
                    return

                date_from = datetime.strptime(date_from,
                                              '%Y%m%d').strftime('%Y-%m-%d')
                date_to = datetime.strptime(date_to,
                                            '%Y%m%d').strftime('%Y-%m-%d')

                qv = {}
                qv.update(dict(gte=date_from))
                qv.update(dict(lte=date_to))
                if isinstance(v[1], str):
                    qry = Q('range', **{v[1]: qv})
                elif isinstance(v[1], tuple) and len(v[1]) >= 2:
                    path = v[1][0]
                    dt = v[1][1]
                    if isinstance(dt, dict):
                        for attr_key, attr_val_str in map(
                                lambda x: (x, request.values.get(x)),
                                list(dt.keys())):
                            attr_obj = dt.get(attr_key)
                            if isinstance(attr_obj, dict) and attr_val_str:
                                attr_key_hit = [
                                    x for x in attr_obj.keys()
                                    if path + "." in x
                                ]
                                if attr_key_hit:
                                    vlst = attr_obj.get(attr_key_hit[0])
                                    if isinstance(vlst, list):
                                        attr_val = [
                                            x for x in attr_val_str.split(',')
                                        ]
                                        shud = []
                                        for j in map(
                                                partial(
                                                    lambda m, n: m[int(n)],
                                                    vlst), attr_val):
                                            qt = Q('term',
                                                   **{attr_key_hit[0]: j})
                                            shud.append(qt)

                                        qry = Q('range',
                                                **{path + ".value": qv})
                                        qry = Q('nested',
                                                path=path,
                                                query=Q('bool',
                                                        should=shud,
                                                        must=[qry]))
            return qry

        kwd = current_app.config['WEKO_SEARCH_KEYWORDS_DICT']
        ks = kwd.get('string')
        kd = kwd.get('date')
        kn = kwd.get('nested')

        mut = []
        try:
            for k, v in ks.items():
                qy = _get_keywords_query(k, v)
                if qy:
                    mut.append(qy)

            for k, v in kn.items():
                qy = _get_nested_query(k, v)
                if qy:
                    mut.append(qy)

            for k, v in kd.items():
                qy = _get_date_query(k, v)
                if qy:
                    mut.append(qy)
        except Exception as e:
            current_app.logger.exception(
                'Detail search query parser failed. err:{0}'.format(e))
        return mut

    def _get_simple_search_query(qs=None):
        """Query parser for simple search.

        :param qs: Query string.
        :return: Query parser.
        """
        # add  Permission filter by publish date and status
        mt = get_permission_filter()
        q = _get_search_qs_query(qs)
        if q:
            mt.append(q)
        mt.extend(_get_detail_keywords_query())
        return Q('bool', must=mt) if mt else Q()

    def _get_simple_search_community_query(community_id, qs=None):
        """Query parser for simple search.

        :param qs: Query string.
        :return: Query parser.
        """
        # add  Permission filter by publish date and status
        comm = Community.get(community_id)
        root_node_id = comm.root_node_id

        mt = get_permission_filter(root_node_id)
        q = _get_search_qs_query(qs)

        if q:
            mt.append(q)
        mt.extend(_get_detail_keywords_query())
        return Q('bool', must=mt) if mt else Q()

    def _default_parser(qstr=None):
        """Default parser that uses the Q() from elasticsearch_dsl.

           Full text Search.
           Detail Search.

        :param qstr: Query string.
        :returns: Query parser.
        """
        # add  Permission filter by publish date and status
        mt = get_permission_filter()

        # multi keywords search filter
        kmt = _get_detail_keywords_query()
        # detail search
        if kmt:
            mt.extend(kmt)
            q = _get_search_qs_query(qs)
            if q:
                mt.append(q)
        else:
            # Full Text Search
            if qstr:
                q_s = Q('multi_match',
                        query=qstr,
                        operator='and',
                        fields=[
                            'content.file.content^1.5',
                            'content.file.content.ja^1.2', '_all',
                            'search_string'
                        ],
                        type='most_fields',
                        minimum_should_match='75%')
                mt.append(q_s)
        return Q('bool', must=mt) if mt else Q()

    def _default_parser_community(community_id, qstr=None):
        """Default parser that uses the Q() from elasticsearch_dsl.

           Full text Search.
           Detail Search.

        :param qstr: Query string.
        :returns: Query parser.
        """
        # add  Permission filter by publish date and status

        comm = Community.get(community_id)
        root_node_id = comm.root_node_id
        mt = get_permission_filter(root_node_id)

        # multi keywords search filter
        kmt = _get_detail_keywords_query()
        # detail search
        if kmt:
            mt.extend(kmt)
            q = _get_search_qs_query(qs)
            if q:
                mt.append(q)
        else:
            # Full Text Search
            if qstr:
                q_s = Q('multi_match',
                        query=qstr,
                        operator='and',
                        fields=[
                            'content.file.content^1.5',
                            'content.file.content.ja^1.2', '_all',
                            'search_string'
                        ],
                        type='most_fields',
                        minimum_should_match='75%')
                mt.append(q_s)
        return Q('bool', must=mt) if mt else Q()

    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory

    # add by ryuu at 1004 start curate
    comm_ide = request.values.get('provisional_communities')
    # simple search
    comm_id_simple = request.values.get('community')
    # add by ryuu at 1004 end
    if comm_id_simple is not None:
        query_parser = query_parser or _default_parser_community
    else:
        query_parser = query_parser or _default_parser

    if search_type is None:
        search_type = request.values.get('search_type')

    qs = request.values.get('q')

    # full text search
    if search_type and '0' in search_type:
        if comm_id_simple is not None:
            query_q = query_parser(comm_id_simple, qs)
        else:
            query_q = query_parser(qs)

    else:
        # simple search
        if comm_ide is not None:
            query_q = _get_simple_search_community_query(comm_ide, qs)
        elif comm_id_simple is not None:
            query_q = _get_simple_search_community_query(comm_id_simple, qs)
        else:
            query_q = _get_simple_search_query(qs)

    src = {'_source': {'excludes': ['content']}}
    search._extra.update(src)

    try:
        search = search.filter(query_q)
    except SyntaxError:
        current_app.logger.debug("Failed parsing query: {0}".format(
            request.values.get('q', '')),
                                 exc_info=True)
        raise InvalidQueryRESTError()

    search_index = search._index[0]
    search, urlkwargs = default_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)

    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    # defalult sort
    if not sortkwargs:
        sort_key, sort = SearchSetting.get_default_sort(
            current_app.config['WEKO_SEARCH_TYPE_KEYWORD'])
        sort_obj = dict()
        key_fileds = SearchSetting.get_sort_key(sort_key)
        if sort == 'desc':
            sort_obj[key_fileds] = dict(order='desc')
            sort_key = '-' + sort_key
        else:
            sort_obj[key_fileds] = dict(order='asc')
        search._sort.append(sort_obj)
        urlkwargs.add('sort', sort_key)

    urlkwargs.add('q', query_q)
    return search, urlkwargs
Ejemplo n.º 19
0
Archivo: query.py Proyecto: mhaya/weko
def item_path_search_factory(self, search, index_id=None):
    """Parse query using Weko-Query-Parser.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    def _get_index_earch_query():

        query_q = {
            "_source": {
                "excludes": ['content']
            },
            "query": {
                "bool": {
                    "must": [{
                        "match": {
                            "path.tree": "@index"
                        }
                    }, {
                        "match": {
                            "relation_version_is_last": "true"
                        }
                    }]
                }
            },
            "aggs": {
                "path": {
                    "terms": {
                        "field": "path.tree",
                        "include": "@index|@index/[^/]+",
                        "size": "@count"
                    },
                    "aggs": {
                        "date_range": {
                            "filter": {
                                "match": {
                                    "publish_status": "0"
                                }
                            },
                            "aggs": {
                                "available": {
                                    "range": {
                                        "field":
                                        "publish_date",
                                        "ranges": [{
                                            "from": "now+1d/d"
                                        }, {
                                            "to": "now+1d/d"
                                        }]
                                    },
                                }
                            }
                        },
                        "no_available": {
                            "filter": {
                                "bool": {
                                    "must_not": [{
                                        "match": {
                                            "publish_status": "0"
                                        }
                                    }]
                                }
                            }
                        }
                    }
                }
            },
            "post_filter": {}
        }

        # add item type aggs
        query_q['aggs']['path']['aggs']. \
            update(get_item_type_aggs(search._index[0]))

        q = request.values.get('q') if index_id is None else index_id
        if q:
            mut = get_permission_filter(q)
        else:
            mut = get_permission_filter()
        if mut:
            mut = list(map(lambda x: x.to_dict(), mut))
            post_filter = query_q['post_filter']
            if mut[0].get('bool'):
                post_filter['bool'] = mut[0]['bool']
            else:
                post_filter['bool'] = {'must': mut}

        # create search query
        if q:
            try:
                fp = Indexes.get_self_path(q)
                if fp:
                    query_q = json.dumps(query_q).replace("@index", fp.path)
                    query_q = json.loads(query_q)
            except BaseException:
                pass

        query_q = json.dumps(query_q).replace("@count",
                                              str(Indexes.get_index_count()))
        query_q = json.loads(query_q)

        return query_q

    # create a index search query
    query_q = _get_index_earch_query()
    urlkwargs = MultiDict()
    try:
        # Aggregations.
        extr = search._extra.copy()
        search.update_from_dict(query_q)
        search._extra.update(extr)
    except SyntaxError:
        q = request.values.get('q', '') if index_id is None else index_id
        current_app.logger.debug("Failed parsing query: {0}".format(q),
                                 exc_info=True)
        raise InvalidQueryRESTError()

    from invenio_records_rest.sorter import default_sorter_factory
    search_index = search._index[0]
    search, sortkwargs = default_sorter_factory(search, search_index)

    for key, value in sortkwargs.items():
        # set custom sort option
        if 'custom_sort' in value:
            ind_id = request.values.get('q', '')
            search._sort = []
            if value == 'custom_sort':
                script_str, default_sort = SearchSetting.get_custom_sort(
                    ind_id, 'asc')
            else:
                script_str, default_sort = SearchSetting.get_custom_sort(
                    ind_id, 'desc')

            search._sort.append(script_str)
            search._sort.append(default_sort)

        # set selectbox
        urlkwargs.add(key, value)

    # default sort
    if not sortkwargs:
        sort_key, sort = SearchSetting.get_default_sort(
            current_app.config['WEKO_SEARCH_TYPE_INDEX'])
        sort_obj = dict()
        key_fileds = SearchSetting.get_sort_key(sort_key)
        if 'custom_sort' not in sort_key:
            if sort == 'desc':
                sort_obj[key_fileds] = dict(order='desc')
                sort_key = '-' + sort_key
            else:
                sort_obj[key_fileds] = dict(order='asc')
            search._sort.append(sort_obj)
        else:
            if sort == 'desc':
                ind_id = request.values.get('q', '')
                script_str, default_sort = SearchSetting.get_custom_sort(
                    ind_id, 'desc')
                sort_key = '-' + sort_key
            else:
                script_str, default_sort = SearchSetting.get_custom_sort(
                    ind_id, 'asc')

            search._sort = []
            search._sort.append(script_str)
            search._sort.append(default_sort)

        urlkwargs.add('sort', sort_key)

    urlkwargs.add('q', query_q)

    return search, urlkwargs
Ejemplo n.º 20
0
def feedback_email_search_factory(self, search):
    """Factory for search feedback email list.

    :param self:
    :param search:
    :return:
    """
    def _get_query():
        query_string = "_type:{} AND " \
                       "relation_version_is_last:true " \
            .format(current_app.config['INDEXER_DEFAULT_DOC_TYPE'])
        query_q = {
            "size": 0,
            "query": {
                "bool": {
                    "must": [{
                        "nested": {
                            "path": "feedback_mail_list",
                            "query": {
                                "bool": {
                                    "must": [{
                                        "exists": {
                                            "field": "feedback_mail_list.email"
                                        }
                                    }]
                                }
                            }
                        }
                    }, {
                        "query_string": {
                            "query": query_string
                        }
                    }]
                }
            },
            "aggs": {
                "feedback_mail_list": {
                    "nested": {
                        "path": "feedback_mail_list"
                    },
                    "aggs": {
                        "email_list": {
                            "terms": {
                                "field": "feedback_mail_list.email",
                                "size": config.WEKO_SEARCH_MAX_FEEDBACK_MAIL
                            },
                            "aggs": {
                                "top_tag_hits": {
                                    "top_hits": {}
                                }
                            }
                        }
                    }
                }
            }
        }
        return query_q

    query_q = _get_query()
    try:
        # Aggregations.
        extr = search._extra.copy()
        search.update_from_dict(query_q)
        search._extra.update(extr)
    except SyntaxError:
        current_app.logger.debug("Failed parsing query: {0}".format(query_q),
                                 exc_info=True)
        raise InvalidQueryRESTError()

    return search