コード例 #1
0
ファイル: search.py プロジェクト: derekstrom/invenio
def autocomplete(field, q):
    """Autocomplete data from indexes.

    It uses POSTed arguments with name `q` that has to be longer than 3
    characters in order to returns any results.

    :param field: index name
    :param q: query string for index term

    :return: list of values matching query.
    """
    IdxPHRASE = IdxINDEX.idxPHRASEF(field, fallback=False)
    results = IdxPHRASE.query.filter(
        IdxPHRASE.term.contains(q)).limit(20).values('term')
    results = map(lambda r: {'value': r[0]}, results)

    return jsonify(results=results)
コード例 #2
0
def autocomplete(field, q):
    """Autocomplete data from indexes.

    It uses POSTed arguments with name `q` that has to be longer than 3
    characters in order to returns any results.

    :param field: index name
    :param q: query string for index term

    :return: list of values matching query.
    """
    IdxPHRASE = IdxINDEX.idxPHRASEF(field, fallback=False)
    results = IdxPHRASE.query.filter(
        IdxPHRASE.term.contains(q)).limit(20).values('term')
    results = map(lambda r: {'value': r[0]}, results)

    return jsonify(results=results)
コード例 #3
0
def search_unit_in_idxphrases(p, f, m, wl=0):
    """Searche for phrase 'p' inside idxPHRASE*F table for field 'f'.

    Return hitset of recIDs found. The search type is defined by 'type'
    (e.g. equals to 'r' for a regexp search).
    """
    # call word search method in some cases:
    if f.endswith('count'):
        return search_unit_in_bibwords(p, f, wl=wl)
    # will hold output result set
    hitset = intbitset()
    # flag for knowing if the query limit has been reached
    limit_reached = 0
    # flag for knowing if to limit the query results or not
    use_query_limit = False
    # deduce in which idxPHRASE table we will search:
    model = IdxINDEX.idxPHRASEF(f, fallback=not f)
    if model is None:
        return intbitset()  # phrase index f does not exist

    # detect query type (exact phrase, partial phrase, regexp):
    if m == 'r':
        use_query_limit = True
        column_filter = lambda column: column.op('REGEXP')(p)
    else:
        p = p.replace('*', '%')  # we now use '*' as the truncation character
        ps = p.split("->", 1)  # check for span query:
        if len(ps) == 2 and not (ps[0].endswith(' ') or ps[1].startswith(' ')):
            use_query_limit = True
            column_filter = lambda column: column.between(ps[0], ps[1])
        else:
            if p.find('%') > -1:
                use_query_limit = True
                column_filter = lambda column: column.like(p)
            else:
                column_filter = lambda column: column == p

    # special washing for fuzzy author index:
    # if f in ('author', 'firstauthor', 'exactauthor', 'exactfirstauthor',
    #          'authorityauthor'):
    #    query_params_washed = ()
    #    for query_param in query_params:
    #        query_params_washed += (wash_author_name(query_param),)
    #    query_params = query_params_washed

    query = model.query.filter(column_filter(model.term))
    # perform search:
    if use_query_limit and wl > 0:
        query = query.limit(wl)

    results = query.values('hitlist')
    limit_reached = use_query_limit and wl > 0 and len(results) == wl
    # fill the result set:
    for row in results:
        hitset |= intbitset(row[0])
    # check to see if the query limit was reached
    if limit_reached:
        # raise an exception, so we can print a nice message to the user
        raise InvenioWebSearchWildcardLimitError(hitset)
    # okay, return result set:
    return hitset
コード例 #4
0
ファイル: native.py プロジェクト: SCOAP3/invenio
def search_unit_in_idxphrases(p, f, m, wl=0):
    """Searche for phrase 'p' inside idxPHRASE*F table for field 'f'.

    Return hitset of recIDs found. The search type is defined by 'type'
    (e.g. equals to 'r' for a regexp search).
    """
    # call word search method in some cases:
    if f.endswith('count'):
        return search_unit_in_bibwords(p, f, wl=wl)
    # will hold output result set
    hitset = intbitset()
    # flag for knowing if the query limit has been reached
    limit_reached = 0
    # flag for knowing if to limit the query results or not
    use_query_limit = False
    # deduce in which idxPHRASE table we will search:
    model = IdxINDEX.idxPHRASEF(f, fallback=not f)
    if model is None:
        return intbitset()  # phrase index f does not exist

    # detect query type (exact phrase, partial phrase, regexp):
    if m == 'r':
        use_query_limit = True
        column_filter = lambda column: column.op('REGEXP')(p)
    else:
        p = p.replace('*', '%')  # we now use '*' as the truncation character
        ps = p.split("->", 1)  # check for span query:
        if len(ps) == 2 and not (ps[0].endswith(' ') or ps[1].startswith(' ')):
            use_query_limit = True
            column_filter = lambda column: column.between(ps[0], ps[1])
        else:
            if p.find('%') > -1:
                use_query_limit = True
                column_filter = lambda column: column.like(p)
            else:
                column_filter = lambda column: column == p

    # special washing for fuzzy author index:
    # if f in ('author', 'firstauthor', 'exactauthor', 'exactfirstauthor',
    #          'authorityauthor'):
    #    query_params_washed = ()
    #    for query_param in query_params:
    #        query_params_washed += (wash_author_name(query_param),)
    #    query_params = query_params_washed

    query = model.query.filter(column_filter(model.term))
    # perform search:
    if use_query_limit and wl > 0:
        query = query.limit(wl)

    results = query.values('hitlist')
    limit_reached = use_query_limit and wl > 0 and len(results) == wl
    # fill the result set:
    for row in results:
        hitset |= intbitset(row[0])
    # check to see if the query limit was reached
    if limit_reached:
        # raise an exception, so we can print a nice message to the user
        raise InvenioWebSearchWildcardLimitError(hitset)
    # okay, return result set:
    return hitset