Esempio n. 1
0
def test_default_facets_factory(app):
    """Test aggregations."""
    defs = dict(
        aggs=dict(
            type=dict(
                terms=dict(field='upload_type'),
            ),
            subtype=dict(
                terms=dict(field='subtype'),
            )
        ),
        filters=dict(
            subtype=terms_filter('subtype'),
        ),
        post_filters=dict(
            type=terms_filter('type'),
        ),
    )
    app.config['RECORDS_REST_FACETS']['testidx'] = defs

    with app.test_request_context('?type=a&subtype=b'):
        search = Search().query(Q(query='value'))
        search, urlkwargs = default_facets_factory(search, 'testidx')
        assert search.to_dict()['aggs'] == defs['aggs']
        assert 'post_filter' in search.to_dict()
        assert search.to_dict(
            )['query']['bool']['filter'][0]['terms']['subtype']

        search = Search().query(Q(query='value'))
        search, urlkwargs = default_facets_factory(search, 'anotheridx')
        assert 'aggs' not in search.to_dict()
        assert 'post_filter' not in search.to_dict()
        assert 'bool' not in search.to_dict()['query']
Esempio n. 2
0
def test_default_facets_factory(app, user_factory):
    """Test aggregations."""
    defs = dict(
        aggs=dict(
            type=dict(
                terms=dict(field="upload_type"),
            ),
            subtype=dict(
                terms=dict(field="subtype"),
            )
        ),
        filters=dict(
            subtype=terms_filter('subtype'),
        ),
        post_filters=dict(
            type=terms_filter('type'),
        ),
    )
    app.config['RECORDS_REST_FACETS']['testidx'] = defs

    with app.test_request_context("?type=a&subtype=b"):
        q = Query("value")
        query, urlkwargs = default_facets_factory(q, 'testidx')
        assert query.body['aggs'] == defs['aggs']
        assert 'post_filter' in query.body
        assert 'filtered' in query.body['query']

        q = Query("value")
        query, urlkwargs = default_facets_factory(q, 'anotheridx')
        assert 'aggs' not in query.body
        assert 'post_filter' not in query.body
        assert 'filtered' not in query.body['query']
Esempio n. 3
0
def and_search_factory(self, search, query_parser=None):
    """Parse query using elasticsearch DSL query.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    def _default_parser(qstr=None):
        """Default parser that uses the Q() from elasticsearch_dsl."""
        if qstr:
            return Q('query_string', query=qstr, default_operator='AND')
        return Q()

    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory

    query_string = request.values.get('q')
    query_parser = query_parser or _default_parser

    try:
        search = search.query(query_parser(query_string))
    except SyntaxError:
        current_app.logger.debug("Failed parsing query: {0}".format(
            request.values.get('q', '')),
                                 exc_info=True)
        raise InvalidQueryRESTError()

    search_index = search._index[0]
    search, urlkwargs = default_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    urlkwargs.add('q', query_string)
    return search, urlkwargs
Esempio n. 4
0
def inspire_search_factory(self, search):
    """Parse query using Invenio-Query-Parser.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory

    query_string = request.values.get('q', '')

    try:
        search = search.query(IQ(query_string, search))
    except SyntaxError:
        current_app.logger.debug("Failed parsing query: {0}".format(
            request.values.get('q', '')),
                                 exc_info=True)
        raise InvalidQueryRESTError()
    finally:
        if current_app.debug:
            current_app.logger.debug(json.dumps(search.to_dict(), indent=4))

    search_index = search._index[0]
    search, urlkwargs = default_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    urlkwargs.add('q', query_string)
    return search, urlkwargs
def _ils_search_factory(self, search, qs_validator):
    """Search factory with Query String validator.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    def query_parser(qstr=None):
        """Default parser that uses the Q() from elasticsearch_dsl."""
        if qstr:
            return Q('query_string', query=qstr)
        return Q()

    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory

    query_string = request.values.get("q")

    query = query_parser(qs_validator(query_string))

    try:
        search = search.query(query)
    except SyntaxError:
        raise SearchQueryError(query_string)

    search_index = search._index[0]
    search, urlkwargs = default_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    urlkwargs.add("q", query_string)
    return search, urlkwargs
Esempio n. 6
0
def inspire_search_factory(self, search):
    """Parse query using Invenio-Query-Parser.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory

    query_string = request.values.get('q', '')
    try:
        search = search.query(IQ(query_string))
    except SyntaxError:
        current_app.logger.debug(
            "Failed parsing query: {0}".format(
                request.values.get('q', '')),
            exc_info=True)
        raise InvalidQueryRESTError()

    search_index = search._index[0]
    search, urlkwargs = default_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    urlkwargs.add('q', query_string)
    return search, urlkwargs
Esempio n. 7
0
def cernopendata_search_factory(self, search):
    """Customized parse query using invenio query parser.

    :param self: REST view
    :param search: Elastic search DSL search instance

    :return: Tuple with search instance and URL arguments
    """
    query_string = request.values.get("q")
    try:
        search = search.query(cernopendata_query_parser(query_string))
    except SyntaxError:
        current_app.logger.debug("Failed parsing query: {0}".format(
            request.values.get("q", "")),
                                 exc_info=True)
        raise InvalidQueryRESTError()

    search_index = search._index[0]
    search, url_kwargs = default_facets_factory(search, search_index)
    search, sort_kwargs = default_sorter_factory(search, search_index)
    for key, value in sort_kwargs.items():
        url_kwargs.add(key, value)
    url_kwargs.add("q", query_string)

    return search, url_kwargs
Esempio n. 8
0
def search_factory(self, search, query_parser=None):
    """Parse query using elasticsearch DSL query.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory
    search_index = search._index[0]

    search, urlkwargs = default_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    # Apply 'identity' grouping by default
    if 'group_by' not in request.values:
        search = search.filter(Q('term', Grouping='identity'))
        urlkwargs['group_by'] = 'identity'

    try:
        query_string = request.values.get('q')
        if query_string:
            search = search.query(
                Q('query_string',
                  query=query_string,
                  default_field='_search_all'))
            urlkwargs['q'] = query_string
    except SyntaxError:
        raise InvalidQueryRESTError()

    # Exclude the identifiers by which the search was made (large aggregate)
    search = search.source(exclude=['*.SearchIdentifier'])
    return search, urlkwargs
Esempio n. 9
0
def search_factory(self, search, query_parser=None):
    """Parse query using elasticsearch DSL query.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory
    search_index = search._index[0]

    # TODO: make "scheme" optional?
    for field in ('id', 'scheme', 'relation'):
        if field not in request.values:
            raise RESTValidationError(
                errors=[FieldError(field, 'Required field.')])

    search, urlkwargs = default_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    # Apply 'identity' grouping by default
    if 'groupBy' not in request.values:
        search = search.filter(Q('term', Grouping='identity'))
        urlkwargs['groupBy'] = 'identity'

    # Exclude the identifiers by which the search was made (large aggregate)
    search = search.source(exclude=['*.SearchIdentifier'])
    return search, urlkwargs
Esempio n. 10
0
def meta_search_factory(self, search, query_parser=None):
    """Parse query using elasticsearch DSL query.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory

    # for field in (['keyword']):
    #     if field not in request.values:
    #         raise RESTValidationError(
    #             errors=[FieldError(field, 'Required field.')])

    search, urlkwargs = default_facets_factory(search, "metadata")
    search, sortkwargs = default_sorter_factory(search, "metadata")
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    search = search.extra(size=0)

    # Apply 'identity' grouping by default
    search = search.filter(Q('term', RelationshipType='Cites'))
    if 'group_by' not in request.values:
        search = search.filter(Q('term', Grouping='identity'))
        urlkwargs['group_by'] = 'identity'

    try:
        query_string = request.values.get('q')
        if query_string:
            search = search.query(
                Q('query_string',
                  query=query_string,
                  default_field='_search_all'))
            urlkwargs['q'] = query_string
    except SyntaxError:
        raise InvalidQueryRESTError()
    size = 10
    if 'size' in request.values:
        size = request.values.get('size')

    start = 0
    if 'page' in request.values:
        start = int(int(request.values.get('page')) - 1) * int(size)

    search.aggs.bucket('Target', 'terms', field='Target.ID', size=1000)\
    .metric("first", "top_hits", _source=dict(include=["Target.Identifier.*", "Target.Creator.Name", "Target.Title"]),  size=1)
    kwargs = {
        'from': start,
        'size': size,
        'sort': [{
            '_count': {
                'order': 'desc'
            }
        }]
    }
    search.aggs['Target'].bucket('pagination', 'bucket_sort', **kwargs)
    return search, urlkwargs
Esempio n. 11
0
def circulation_search_factory(self, search, query_parser=None):
    """Parse query using elasticsearch DSL query.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    def _default_parser(qstr=None):
        """Return default parser that uses the Q() from elasticsearch_dsl."""
        if qstr:
            return Q('query_string', query=qstr)
        return Q()

    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory

    query_string = request.values.get('q', '')
    query = _default_parser(qstr=query_string)

    # if the logged in user in not librarian or admin, validate the query
    if not backoffice_permission().allows(g.identity):
        # patron can find only his loans
        try:
            if not query_string:
                # force query to be patron_pid:<logged in user>
                only_patron_loans = 'patron_pid:{}'.format(g.identity.id)
                query = _default_parser(qstr=only_patron_loans)
            else:
                # check for patron_pid query value
                match = re.match(r"patron_pid:(?P<pid>\d)", query_string)
                if match and match.group('pid') != str(g.identity.id):
                    raise UnauthorizedSearch()
        except UnauthorizedSearch:
            current_app.logger.debug(
                "Search for `{0}` not allowed by `patron_pid:{1}`".format(
                    query_string, str(g.identity.id))
            )
            abort(403)

    try:
        search = search.query(query)
    except SyntaxError:
        current_app.logger.debug(
            "Failed parsing query: {0}".format(query_string), exc_info=True)
        raise InvalidQueryRESTError()

    search_index = search._index[0]
    search, urlkwargs = default_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    urlkwargs.add('q', query_string)
    return search, urlkwargs
Esempio n. 12
0
def test_default_facets_factory(app, user_factory):
    """Test aggregations."""
    defs = dict(
        aggs=dict(type=dict(terms=dict(field="upload_type"), ),
                  subtype=dict(terms=dict(field="subtype"), )),
        filters=dict(subtype=terms_filter('subtype'), ),
        post_filters=dict(type=terms_filter('type'), ),
    )
    app.config['RECORDS_REST_FACETS']['testidx'] = defs

    with app.test_request_context("?type=a&subtype=b"):
        q = Query("value")
        query, urlkwargs = default_facets_factory(q, 'testidx')
        assert query.body['aggs'] == defs['aggs']
        assert 'post_filter' in query.body
        assert 'filtered' in query.body['query']

        q = Query("value")
        query, urlkwargs = default_facets_factory(q, 'anotheridx')
        assert 'aggs' not in query.body
        assert 'post_filter' not in query.body
        assert 'filtered' not in query.body['query']
Esempio n. 13
0
def filter_by_patron_search_factory(self, search, query_parser=None):
    """Filter search queries to only show records for the logged in patron.

    If the logged in  user has backoffice permissions do not filter by patron.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    def _default_parser(qstr=None):
        """Return default parser that uses the Q() from elasticsearch_dsl."""
        if qstr:
            return Q('query_string', query=qstr)
        return Q()

    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory

    query_string = request.values.get('q', '')

    if not current_user.is_authenticated:
        raise UnauthorizedSearchError(query_string)

    parser = query_parser or _default_parser
    query = parser(qstr=query_string)

    # if the logged in user in not librarian or admin, validate the query
    if has_request_context() and not backoffice_permission().allows(g.identity):
        # patron can find only his document requests
        if not query_string:
            # force query to be patron_pid:<logged in user>
            patron_pid_filter = 'patron_pid:{}'.format(g.identity.id)
            query = _default_parser(qstr=patron_pid_filter)
        else:
            # check for patron_pid query value
            match = re.match(r"patron_pid:(?P<pid>\d)", query_string)
            if match and match.group('pid') != str(g.identity.id):
                raise UnauthorizedSearchError(query_string, g.identity.id)
    try:
        search = search.query(query)
    except SyntaxError:
        raise SearchQueryError(query_string)

    search_index = search._index[0]
    search, urlkwargs = default_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    urlkwargs.add('q', query_string)
    return search, urlkwargs
Esempio n. 14
0
def test_default_facets_factory(app):
    """Test aggregations."""
    defs = dict(
        aggs=dict(type=dict(terms=dict(field='upload_type'), ),
                  subtype=dict(terms=dict(field='subtype'), )),
        filters=dict(subtype=terms_filter('subtype'), ),
        post_filters=dict(type=terms_filter('type'), ),
    )
    app.config['RECORDS_REST_FACETS']['testidx'] = defs

    with app.test_request_context('?type=a&subtype=b'):
        search = Search().query(Q(query='value'))
        search, urlkwargs = default_facets_factory(search, 'testidx')
        assert search.to_dict()['aggs'] == defs['aggs']
        assert 'post_filter' in search.to_dict()
        assert search.to_dict(
        )['query']['bool']['filter'][0]['terms']['subtype']

        search = Search().query(Q(query='value'))
        search, urlkwargs = default_facets_factory(search, 'anotheridx')
        assert 'aggs' not in search.to_dict()
        assert 'post_filter' not in search.to_dict()
        assert 'bool' not in search.to_dict()['query']
Esempio n. 15
0
def default_search_factory(self, search, query_parser=None):
    """Parse query using elasticsearch DSL query.

    :param search: Elastic search DSL search instance.
    :param query_parser: Custom query parser.
    :returns: Tuple with search instance and URL arguments.
    """

    def _default_parser(qstr=None):
        """Default parser that uses the Q() from elasticsearch_dsl.

        :param qstr: Query string.
        :returns: Query object.
        """
        if not qstr:
            return Q()

        operator, query_type = get_operator_and_query_type(qstr)

        return Q(query_type, query=qstr, default_operator=operator)

    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory

    query_string = request.values.get('q')

    # Use query parser given to function or the default one.
    query_parser = query_parser or _default_parser

    # Search query
    search = search.query(query_parser(query_string))

    # Get index corresponding to record type.
    search_index = getattr(search, '_original_index', search._index)[0]

    # Build facets
    search, urlkwargs = default_facets_factory(search, search_index)

    # Sort records
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    urlkwargs.add('q', query_string)

    # Add explanation to hits
    if request.args.get('debug'):
        search = search.extra(explain=True)

    return search, urlkwargs
def ils_search_factory(self, search, validator=None):
    """Search factory with Query String validator.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    def query_parser(search, qstr=None):
        """Default parser that uses the Q() from elasticsearch_dsl."""
        if qstr:
            boosted = getattr(search, "boosted_fields", [])
            extra_params = {}
            if boosted:
                extra_params["fields"] = boosted + ["*"]
                # add lenient parameter in order to fix
                # parsing exception on data fields, see known issues
                # https://www.elastic.co/guide/en/elasticsearch/reference/current/release-notes-7.1.1.html  # noqa
                extra_params["lenient"] = True
            return Q("query_string", query=qstr, **extra_params)
        return Q()

    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory

    query_string = request.values.get("q")

    if validator:
        search, query_string = validator(search, query_string)
    query = query_parser(search, qstr=query_string)

    try:
        search = search.query(query)
    except SyntaxError:
        current_app.logger.debug(
            "Failed parsing query: {0}".format(request.values.get("q", "")),
            exc_info=True,
        )
        raise SearchQueryError(query_string)

    search_index = getattr(search, "_original_index", search._index)[0]
    search, urlkwargs = default_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    urlkwargs.add("q", query_string)
    return search, urlkwargs
Esempio n. 17
0
def and_search_factory(self, search, query_parser=None):
    """Parse query using elasticsearch DSL query.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    def _default_parser(qstr=None):
        """Default parser that uses the Q() from elasticsearch_dsl."""
        if qstr:
            return Q('query_string', query=qstr, default_operator='AND')
        return Q()

    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory

    query_string = request.values.get('q')
    query_parser = query_parser or _default_parser

    try:
        search = search.query(query_parser(query_string))
    except SyntaxError:
        current_app.logger.debug(
            f'Failed parsing query: {request.values.get("q", "")}',
            exc_info=True,
        )
        raise InvalidQueryRESTError()

    search_index = search._index[0]
    search, urlkwargs = default_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    urlkwargs.add('q', query_string)

    # include deleted
    deleted = request.args.get('deleted')
    if not deleted:
        search = search.filter('bool', must_not=[Q('exists', field='deleted')])

    return search, urlkwargs
def _ils_search_factory(self, search, validator):
    """Search factory with Query String validator.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :returns: Tuple with search instance and URL arguments.
    """
    def query_parser(qstr=None):
        """Default parser that uses the Q() from elasticsearch_dsl."""
        if qstr:
            return Q('query_string', query=qstr)
        return Q()

    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory

    query_string = request.values.get("q")

    search, query_string = validator(search, query_string)
    query = query_parser(query_string)

    try:
        search = search.query(query)
    except SyntaxError:
        current_app.logger.debug("Failed parsing query: {0}".format(
            request.values.get('q', '')),
                                 exc_info=True)
        raise SearchQueryError(query_string)

    search_index = getattr(search, '_original_index', search._index)[0]
    search, urlkwargs = default_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    urlkwargs.add("q", query_string)
    return search, urlkwargs
Esempio n. 19
0
File: query.py Progetto: mhaya/weko
def default_search_factory(self, search, query_parser=None, search_type=None):
    """Parse query using Weko-Query-Parser. MetaData Search.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :param query_parser: Query parser. (Default: ``None``)
    :returns: Tuple with search instance and URL arguments.
    """
    def _get_search_qs_query(qs=None):
        """Qs of search bar keywords for detail simple search.

        :param qs: Query string.
        :return: Query parser.
        """
        q = Q('query_string',
              query=qs,
              default_operator='and',
              fields=['search_*', 'search_*.ja']) if qs else None
        return q

    def _get_detail_keywords_query():
        """Get keywords query.

        :return: Query parser.
        """
        def _get_keywords_query(k, v):
            qry = None
            kv = request.values.get(k)
            if not kv:
                return

            if isinstance(v, str):
                name_dict = dict(operator="and")
                name_dict.update(dict(query=kv))
                qry = Q('match', **{v: name_dict})
            elif isinstance(v, list):
                qry = Q('multi_match',
                        query=kv,
                        type='most_fields',
                        minimum_should_match='75%',
                        operator='and',
                        fields=v)
            elif isinstance(v, dict):
                for key, vlst in v.items():
                    if isinstance(vlst, list):
                        shud = []
                        kvl = [
                            x for x in kv.split(',')
                            if x.isdecimal() and int(x) < len(vlst)
                        ]
                        for j in map(partial(lambda x, y: x[int(y)], vlst),
                                     kvl):
                            name_dict = dict(operator="and")
                            name_dict.update(dict(query=j))
                            shud.append(Q('match', **{key: name_dict}))

                        kvl = [
                            x for x in kv.split(',')
                            if not x.isdecimal() and x in vlst
                        ]
                        for j in kvl:
                            name_dict = dict(operator="and")
                            name_dict.update(dict(query=j))
                            shud.append(Q('match', **{key: name_dict}))
                        if shud:
                            return Q('bool', should=shud)
            elif isinstance(v, tuple) and len(v) >= 2:
                shud = []
                for i in map(lambda x: v[1](x), kv.split(',')):
                    shud.append(Q('term', **{v[0]: i}))
                if shud:
                    qry = Q('bool', should=shud)

            return qry

        def _get_nested_query(k, v):
            # text value
            kv = request.values.get(k)
            if not kv:
                return

            shuld = []
            if isinstance(v, tuple) and len(v) > 1 and isinstance(v[1], dict):
                # attr keyword in request url
                for attr_key, attr_val_str in map(
                        lambda x: (x, request.values.get(x)),
                        list(v[1].keys())):
                    attr_obj = v[1].get(attr_key)
                    if isinstance(attr_obj, dict) and attr_val_str:
                        if isinstance(v[0], str) and not len(v[0]):
                            # For ID search
                            for key in attr_val_str.split(','):
                                attr = attr_obj.get(key)
                                if isinstance(attr, tuple):
                                    attr = [attr]

                                if isinstance(attr, list):
                                    for alst in attr:
                                        if isinstance(alst, tuple):
                                            val_attr_lst = alst[1].split('=')
                                            name = alst[0] + ".value"
                                            name_dict = dict(operator="and")
                                            name_dict.update(dict(query=kv))
                                            mut = [
                                                Q('match', **{name: name_dict})
                                            ]

                                            qt = None
                                            if '=*' not in alst[1]:
                                                name = alst[0] + \
                                                    "." + val_attr_lst[0]
                                                qt = [
                                                    Q(
                                                        'term', **{
                                                            name:
                                                            val_attr_lst[1]
                                                        })
                                                ]

                                            mut.extend(qt or [])
                                            qry = Q('bool', must=mut)
                                            shuld.append(
                                                Q('nested',
                                                  path=alst[0],
                                                  query=qry))
                        else:
                            attr_key_hit = [
                                x for x in attr_obj.keys() if v[0] + "." in x
                            ]
                            if attr_key_hit:
                                vlst = attr_obj.get(attr_key_hit[0])
                                if isinstance(vlst, list):
                                    attr_val = [
                                        x for x in attr_val_str.split(',')
                                        if x.isdecimal() and int(x) < len(vlst)
                                    ]
                                    if attr_val:
                                        shud = []
                                        name = v[0] + ".value"
                                        name_dict = dict(operator="and")
                                        name_dict.update(dict(query=kv))
                                        qm = Q('match', **{name: name_dict})

                                        for j in map(
                                                partial(
                                                    lambda m, n: m[int(n)],
                                                    vlst), attr_val):
                                            name = attr_key_hit[0]
                                            qm = Q('term', **{name: j})
                                            shud.append(qm)

                                        shuld.append(
                                            Q('nested',
                                              path=v[0],
                                              query=Q('bool',
                                                      should=shud,
                                                      must=[qm])))

            return Q('bool', should=shuld) if shuld else None

        def _get_date_query(k, v):
            # text value
            qry = None
            if isinstance(v, list) and len(v) >= 2:
                date_from = request.values.get(k + "_" + v[0][0])
                date_to = request.values.get(k + "_" + v[0][1])
                if not date_from or not date_to:
                    return

                date_from = datetime.strptime(date_from,
                                              '%Y%m%d').strftime('%Y-%m-%d')
                date_to = datetime.strptime(date_to,
                                            '%Y%m%d').strftime('%Y-%m-%d')

                qv = {}
                qv.update(dict(gte=date_from))
                qv.update(dict(lte=date_to))
                if isinstance(v[1], str):
                    qry = Q('range', **{v[1]: qv})
                elif isinstance(v[1], tuple) and len(v[1]) >= 2:
                    path = v[1][0]
                    dt = v[1][1]
                    if isinstance(dt, dict):
                        for attr_key, attr_val_str in map(
                                lambda x: (x, request.values.get(x)),
                                list(dt.keys())):
                            attr_obj = dt.get(attr_key)
                            if isinstance(attr_obj, dict) and attr_val_str:
                                attr_key_hit = [
                                    x for x in attr_obj.keys()
                                    if path + "." in x
                                ]
                                if attr_key_hit:
                                    vlst = attr_obj.get(attr_key_hit[0])
                                    if isinstance(vlst, list):
                                        attr_val = [
                                            x for x in attr_val_str.split(',')
                                        ]
                                        shud = []
                                        for j in map(
                                                partial(
                                                    lambda m, n: m[int(n)],
                                                    vlst), attr_val):
                                            qt = Q('term',
                                                   **{attr_key_hit[0]: j})
                                            shud.append(qt)

                                        qry = Q('range',
                                                **{path + ".value": qv})
                                        qry = Q('nested',
                                                path=path,
                                                query=Q('bool',
                                                        should=shud,
                                                        must=[qry]))
            return qry

        kwd = current_app.config['WEKO_SEARCH_KEYWORDS_DICT']
        ks = kwd.get('string')
        kd = kwd.get('date')
        kn = kwd.get('nested')

        mut = []
        try:
            for k, v in ks.items():
                qy = _get_keywords_query(k, v)
                if qy:
                    mut.append(qy)

            for k, v in kn.items():
                qy = _get_nested_query(k, v)
                if qy:
                    mut.append(qy)

            for k, v in kd.items():
                qy = _get_date_query(k, v)
                if qy:
                    mut.append(qy)
        except Exception as e:
            current_app.logger.exception(
                'Detail search query parser failed. err:{0}'.format(e))
        return mut

    def _get_simple_search_query(qs=None):
        """Query parser for simple search.

        :param qs: Query string.
        :return: Query parser.
        """
        # add  Permission filter by publish date and status
        mt = get_permission_filter()
        q = _get_search_qs_query(qs)
        if q:
            mt.append(q)
        mt.extend(_get_detail_keywords_query())
        return Q('bool', must=mt) if mt else Q()

    def _get_simple_search_community_query(community_id, qs=None):
        """Query parser for simple search.

        :param qs: Query string.
        :return: Query parser.
        """
        # add  Permission filter by publish date and status
        comm = Community.get(community_id)
        root_node_id = comm.root_node_id

        mt = get_permission_filter(root_node_id)
        q = _get_search_qs_query(qs)

        if q:
            mt.append(q)
        mt.extend(_get_detail_keywords_query())
        return Q('bool', must=mt) if mt else Q()

    def _default_parser(qstr=None):
        """Default parser that uses the Q() from elasticsearch_dsl.

           Full text Search.
           Detail Search.

        :param qstr: Query string.
        :returns: Query parser.
        """
        # add  Permission filter by publish date and status
        mt = get_permission_filter()

        # multi keywords search filter
        kmt = _get_detail_keywords_query()
        # detail search
        if kmt:
            mt.extend(kmt)
            q = _get_search_qs_query(qs)
            if q:
                mt.append(q)
        else:
            # Full Text Search
            if qstr:
                q_s = Q('multi_match',
                        query=qstr,
                        operator='and',
                        fields=[
                            'content.file.content^1.5',
                            'content.file.content.ja^1.2', '_all',
                            'search_string'
                        ],
                        type='most_fields',
                        minimum_should_match='75%')
                mt.append(q_s)
        return Q('bool', must=mt) if mt else Q()

    def _default_parser_community(community_id, qstr=None):
        """Default parser that uses the Q() from elasticsearch_dsl.

           Full text Search.
           Detail Search.

        :param qstr: Query string.
        :returns: Query parser.
        """
        # add  Permission filter by publish date and status

        comm = Community.get(community_id)
        root_node_id = comm.root_node_id
        mt = get_permission_filter(root_node_id)

        # multi keywords search filter
        kmt = _get_detail_keywords_query()
        # detail search
        if kmt:
            mt.extend(kmt)
            q = _get_search_qs_query(qs)
            if q:
                mt.append(q)
        else:
            # Full Text Search
            if qstr:
                q_s = Q('multi_match',
                        query=qstr,
                        operator='and',
                        fields=[
                            'content.file.content^1.5',
                            'content.file.content.ja^1.2', '_all',
                            'search_string'
                        ],
                        type='most_fields',
                        minimum_should_match='75%')
                mt.append(q_s)
        return Q('bool', must=mt) if mt else Q()

    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory

    # add by ryuu at 1004 start curate
    comm_ide = request.values.get('provisional_communities')
    # simple search
    comm_id_simple = request.values.get('community')
    # add by ryuu at 1004 end
    if comm_id_simple is not None:
        query_parser = query_parser or _default_parser_community
    else:
        query_parser = query_parser or _default_parser

    if search_type is None:
        search_type = request.values.get('search_type')

    qs = request.values.get('q')

    # full text search
    if search_type and '0' in search_type:
        if comm_id_simple is not None:
            query_q = query_parser(comm_id_simple, qs)
        else:
            query_q = query_parser(qs)

    else:
        # simple search
        if comm_ide is not None:
            query_q = _get_simple_search_community_query(comm_ide, qs)
        elif comm_id_simple is not None:
            query_q = _get_simple_search_community_query(comm_id_simple, qs)
        else:
            query_q = _get_simple_search_query(qs)

    src = {'_source': {'excludes': ['content']}}
    search._extra.update(src)

    try:
        search = search.filter(query_q)
    except SyntaxError:
        current_app.logger.debug("Failed parsing query: {0}".format(
            request.values.get('q', '')),
                                 exc_info=True)
        raise InvalidQueryRESTError()

    search_index = search._index[0]
    search, urlkwargs = default_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)

    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)

    # defalult sort
    if not sortkwargs:
        sort_key, sort = SearchSetting.get_default_sort(
            current_app.config['WEKO_SEARCH_TYPE_KEYWORD'])
        sort_obj = dict()
        key_fileds = SearchSetting.get_sort_key(sort_key)
        if sort == 'desc':
            sort_obj[key_fileds] = dict(order='desc')
            sort_key = '-' + sort_key
        else:
            sort_obj[key_fileds] = dict(order='asc')
        search._sort.append(sort_obj)
        urlkwargs.add('sort', sort_key)

    urlkwargs.add('q', query_q)
    return search, urlkwargs
Esempio n. 20
0
def search_factory(self, search, query_parser=None):
    """Parse query using elasticsearch DSL query.

    Terms defined by: RERO_ILS_QUERY_BOOSTING will be boosted
    at the query level.

    :param self: REST view.
    :param search: Elastic search DSL search instance.
    :param query_parser: a specific query parser
    :return: Tuple with search instance and URL arguments.
    """
    def _default_parser(qstr=None, query_boosting=[]):
        """Default parser that uses the Q() from elasticsearch_dsl."""
        query_type = 'query_string'
        default_operator = 'OR'
        if request.args.get('simple'):
            query_type = 'simple_query_string'
            default_operator = 'AND'

        if qstr:
            # TODO: remove this bad hack
            qstr = _PUNCTUATION_REGEX.sub(' ', qstr)
            qstr = re.sub('\s+', ' ', qstr).rstrip()
            if not query_boosting:
                return Q(query_type,
                         query=qstr,
                         default_operator=default_operator)
            else:
                return Q('bool',
                         should=[
                             Q(query_type,
                               query=qstr,
                               boost=2,
                               fields=query_boosting,
                               default_operator=default_operator),
                             Q(query_type,
                               query=qstr,
                               default_operator=default_operator)
                         ])
        return Q()

    def _boosting_parser(query_boosting, search_index):
        """Elasticsearch boosting fields parser."""
        boosting = []
        if search_index in query_boosting:
            for field, boost in query_boosting[search_index].items():
                boosting.append('{}^{}'.format(field, boost))
        return boosting

    from invenio_records_rest.facets import default_facets_factory
    from invenio_records_rest.sorter import default_sorter_factory

    query_string = request.values.get('q')
    display_score = request.values.get('display_score')
    if display_score:
        search = search.extra(explain=True)
    query_parser = query_parser or _default_parser

    search_index = search._index[0]
    query_boosting = _boosting_parser(
        current_app.config['RERO_ILS_QUERY_BOOSTING'], search_index)

    try:
        search = search.query(query_parser(query_string, query_boosting))
    except SyntaxError:
        current_app.logger.debug(
            'Failed parsing query: {0}'.format(request.values.get('q', '')),
            exc_info=True,
        )
        raise InvalidQueryRESTError()

    search, urlkwargs = default_facets_factory(search, search_index)
    # i18n translated facets
    search = i18n_facets_factory(search, search_index)
    search, sortkwargs = default_sorter_factory(search, search_index)
    for key, value in sortkwargs.items():
        urlkwargs.add(key, value)
    urlkwargs.add('q', query_string)
    return search, urlkwargs