Beispiel #1
0
def get_field_tags(field, tagtype="marc"):
    """Returns a list of tags for the field code 'field'.

    Example: field='author', output=['100__%','700__%'].
    """
    from invenio.modules.search.models import Field
    return list(Field.get_field_tags(field, tagtype=tagtype))
Beispiel #2
0
def get_field_tags(field, tagtype="marc"):
    """Returns a list of tags for the field code 'field'.

    Example: field='author', output=['100__%','700__%'].
    """
    from invenio.modules.search.models import Field
    return list(Field.get_field_tags(field, tagtype=tagtype))
Beispiel #3
0
def get_field_tags(field, tagtype="marc"):
    """Returns a list of tags for the field code 'field'. Works
       for both MARC and nonMARC tags.
       Returns empty list in case of error.
       Example: field='author', output=['100__%','700__%'].
       @param tagtype: can be: "marc" or "nonmarc", default value
            is "marc" for backward compatibility
    """
    from invenio.modules.search.models import Field
    return list(Field.get_field_tags(field, tagtype=tagtype))
Beispiel #4
0
def get_field_tags(field, tagtype="marc"):
    """Returns a list of tags for the field code 'field'. Works
       for both MARC and nonMARC tags.
       Returns empty list in case of error.
       Example: field='author', output=['100__%','700__%'].
       @param tagtype: can be: "marc" or "nonmarc", default value
            is "marc" for backward compatibility
    """
    from invenio.modules.search.models import Field
    return list(Field.get_field_tags(field, tagtype=tagtype))
Beispiel #5
0
def references_nb_counts():
    """Get number of references for the record `recid`."""
    recid = request.view_args.get("recid")
    if recid is None:
        return

    from invenio.legacy.bibrecord import record_get_field_instances
    from invenio.modules.search.models import Field
    from invenio.modules.records.api import get_record

    if not CFG_CERN_SITE:
        reftag = ""
        reftags = list(Field.get_field_tags("reference"))
        if reftags:
            reftag = reftags[0]
        tmprec = get_record(recid)
        if reftag and len(reftag) > 4:
            return len(record_get_field_instances(tmprec, reftag[0:3], reftag[3], reftag[4]))
    return 0
Beispiel #6
0
def references_nb_counts():
    """Get number of references for the record `recid`."""
    recid = request.view_args.get('recid')
    if recid is None:
        return

    from invenio.legacy.bibrecord import record_get_field_instances
    from invenio.modules.search.models import Field
    from invenio.modules.records.api import get_record

    if not CFG_CERN_SITE:
        reftag = ""
        reftags = list(Field.get_field_tags("reference"))
        if reftags:
            reftag = reftags[0]
        tmprec = get_record(recid)
        if reftag and len(reftag) > 4:
            return len(
                record_get_field_instances(tmprec, reftag[0:3], reftag[3],
                                           reftag[4]))
    return 0
Beispiel #7
0
def get_tags_from_sort_fields(sort_fields):
    """Return the tags associated with sort fields.

    The second item in tuple contains the name of the field that has no tags
    associated.
    """
    tags = []
    if not sort_fields:
        return [], ''
    for sort_field in sort_fields:
        if sort_field and (len(sort_field) > 1
                           and str(sort_field[0:2]).isdigit()):
            # sort_field starts by two digits, so this is probably
            # a MARC tag already
            tags.append(sort_field)
        else:
            # let us check the 'field' table
            field_tags = Field.get_field_tags(sort_field)
            if field_tags:
                tags.extend(field_tags)
            else:
                return [], sort_field
    return tags, ''
Beispiel #8
0
def get_tags_from_sort_fields(sort_fields):
    """Return the tags associated with sort fields.

    The second item in tuple contains the name of the field that has no tags
    associated.
    """
    tags = []
    if not sort_fields:
        return [], ''
    for sort_field in sort_fields:
        if sort_field and (
                len(sort_field) > 1 and str(sort_field[0:2]).isdigit()):
            # sort_field starts by two digits, so this is probably
            # a MARC tag already
            tags.append(sort_field)
        else:
            # let us check the 'field' table
            field_tags = Field.get_field_tags(sort_field)
            if field_tags:
                tags.extend(field_tags)
            else:
                return [], sort_field
    return tags, ''
Beispiel #9
0
def search_unit_in_bibxxx(p, f, m, wl=0):
    """Search for pattern 'p' inside bibxxx tables for field 'f'.

    Returns hitset of recIDs found. The search type is defined by 'type'
    (e.g. equals to 'r' for a regexp search).
    """
    # call word search method in some cases:
    if f and (f == 'journal' or f.endswith('count')):
        return search_unit_in_bibwords(p, f, wl=wl)

    hitset = intbitset()
    # flag for knowing if the query limit has been reached
    limit_reached = False
    # flag for knowing if to limit the query results or not
    use_query_limit = False
    # replace truncation char '*' in field definition
    if f is not None:
        f = f.replace('*', '%')

    if m == 'r':
        use_query_limit = True
        column_filter = lambda column: column.op('REGEXP')(p)
    else:
        p = p.replace('*', '%')  # we now use '*' as the truncation character
        ps = p.split("->", 1)  # check for span query:
        if len(ps) == 2 and not (ps[0].endswith(' ') or ps[1].startswith(' ')):
            use_query_limit = True
            column_filter = lambda column: column.between(ps[0], ps[1])
        else:
            if p.find('%') > -1:
                use_query_limit = True
                column_filter = lambda column: column.like(p)
            else:
                column_filter = lambda column: column == p

    # construct 'tl' which defines the tag list (MARC tags) to search in:
    tl = []
    if len(f) >= 2 and str(f[0]).isdigit() and str(f[1]).isdigit():
        tl.append(f)  # 'f' seems to be okay as it starts by two digits
    else:
        # deduce desired MARC tags on the basis of chosen 'f'
        tl = Field.get_field_tags(f)
        if not tl:
            # f index does not exist, nevermind
            pass
    # okay, start search:
    for t in tl:
        # construct and run query:
        if t == "001":
            column = Record.id
            query = Record.query.filter(column_filter(column))
        else:
            # deduce into which bibxxx table we will search:
            digit1, digit2 = int(t[0]), int(t[1])
            model = getattr(models, 'Bib{0}{1}x'.format(digit1, digit2))
            column_condition = column_filter(model.value)

            if len(t) != 6 or t[-1:] == '%':
                # only the beginning of field 't' is defined, so add wildcard
                # character:
                tag_condition = model.tag.like(t + '%')
            else:
                tag_condition = model.tag == t

            query = model.query.join(model.bibrecs).filter(
                column_condition, tag_condition)
            column = 'id_bibrec'

        if use_query_limit and wl > 0:
            query = query.limit(wl)
        res = query.values(column)
        res = intbitset([row[0] for row in res])
        limit_reached |= use_query_limit and len(res) > 0 and len(res) == wl
        hitset |= res

    # check to see if the query limit was reached
    if limit_reached:
        # raise an exception, so we can print a nice message to the user
        raise InvenioWebSearchWildcardLimitError(hitset)
    return hitset
Beispiel #10
0
def search_unit_in_bibxxx(p, f, m, wl=0):
    """Search for pattern 'p' inside bibxxx tables for field 'f'.

    Returns hitset of recIDs found. The search type is defined by 'type'
    (e.g. equals to 'r' for a regexp search).
    """
    # call word search method in some cases:
    if f and (f == "journal" or f.endswith("count")):
        return search_unit_in_bibwords(p, f, wl=wl)

    hitset = intbitset()
    # flag for knowing if the query limit has been reached
    limit_reached = False
    # flag for knowing if to limit the query results or not
    use_query_limit = False
    # replace truncation char '*' in field definition
    if f is not None:
        f = f.replace("*", "%")

    if m == "r":
        use_query_limit = True
        column_filter = lambda column: column.op("REGEXP")(p)
    else:
        p = p.replace("*", "%")  # we now use '*' as the truncation character
        ps = p.split("->", 1)  # check for span query:
        if len(ps) == 2 and not (ps[0].endswith(" ") or ps[1].startswith(" ")):
            use_query_limit = True
            column_filter = lambda column: column.between(ps[0], ps[1])
        else:
            if p.find("%") > -1:
                use_query_limit = True
                column_filter = lambda column: column.like(p)
            else:
                column_filter = lambda column: column == p

    # construct 'tl' which defines the tag list (MARC tags) to search in:
    tl = []
    if len(f) >= 2 and str(f[0]).isdigit() and str(f[1]).isdigit():
        tl.append(f)  # 'f' seems to be okay as it starts by two digits
    else:
        # deduce desired MARC tags on the basis of chosen 'f'
        tl = Field.get_field_tags(f)
        if not tl:
            # f index does not exist, nevermind
            pass
    # okay, start search:
    for t in tl:
        # construct and run query:
        if t == "001":
            column = Record.id
            query = Record.query.filter(column_filter(column))
        else:
            # deduce into which bibxxx table we will search:
            digit1, digit2 = int(t[0]), int(t[1])
            model = getattr(models, "Bib{0}{1}x".format(digit1, digit2))
            column_condition = column_filter(model.value)

            if len(t) != 6 or t[-1:] == "%":
                # only the beginning of field 't' is defined, so add wildcard
                # character:
                tag_condition = model.tag.like(t + "%")
            else:
                tag_condition = model.tag == t

            query = model.query.join(model.bibrecs).filter(column_condition, tag_condition)
            column = "id_bibrec"

        if use_query_limit and wl > 0:
            query = query.limit(wl)
        res = query.values(column)
        res = intbitset([row[0] for row in res])
        limit_reached |= use_query_limit and len(res) > 0 and len(res) == wl
        hitset |= res

    # check to see if the query limit was reached
    if limit_reached:
        # raise an exception, so we can print a nice message to the user
        raise InvenioWebSearchWildcardLimitError(hitset)
    return hitset