Beispiel #1
0
def get_kbd_values(kbname, searchwith=""):
    """Return a list of values by searching a dynamic kb.

    :param kbname:     name of the knowledge base
    :param searchwith: a term to search with
    """
    from invenio.legacy import search_engine

    # first check that the kb in question is dynamic
    kb = get_kb_by_name(kbname)
    kbid = kb.id
    if not kbid:
        return []
    kbtype = kb.kbtype
    if not kbtype:
        return []
    if kbtype != 'd':
        return []
    # get the configuration so that we see what the field is
    confdict = kb.kbdefs.to_dict()
    if not confdict:
        return []
    if 'field' not in confdict:
        return []
    field = confdict['field']
    expression = confdict['expression']
    collection = ""
    if 'collection' in confdict:
        collection = confdict['collection']
    reclist = []  # return this
    if searchwith and expression:
        if (expression.count('%') > 0):
            expression = expression.replace("%", searchwith)
            reclist = search_engine.perform_request_search(p=expression,
                                                           cc=collection)
        else:
            # no %.. just make a combination
            expression = expression + " and " + searchwith
            reclist = search_engine.perform_request_search(p=expression,
                                                           cc=collection)
    else:  # either no expr or no searchwith.. but never mind about searchwith
        if expression:  # in this case: only expression
            reclist = search_engine.perform_request_search(p=expression,
                                                           cc=collection)
        else:
            # make a fake expression so that only records that have this field
            # will be returned
            fake_exp = "/.*/"
            if searchwith:
                fake_exp = searchwith
            reclist = search_engine.perform_request_search(f=field,
                                                           p=fake_exp,
                                                           cc=collection)
    if reclist:
        return [
            val
            for (val, dummy) in search_engine.get_most_popular_field_values(
                reclist, field)
        ]
    return []  # in case nothing worked
Beispiel #2
0
def _get_coauthors_fallback(personid, collabs):
    # python 2.4 does not supprt max() with key argument.
    # Please remove this function when python 2.6 is supported.
    def max_key(iterable, key):
        try:
            ret = iterable[0]
        except IndexError:
            return None
        for i in iterable[1:]:
            if key(i) > key(ret):
                ret = i
        return ret

    if collabs:
        query = 'exactauthor:"%s" and (%s)' % (personid, ' or '.join([('collaboration:"%s"' % x) for x in zip(*collabs)[0]]))
        exclude_recs = perform_request_search(rg=0, p=query)
    else:
        exclude_recs = []

    recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(personid))
    recids = list(set(recids) - set(exclude_recs))
    a = format_records(recids, 'WAPAFF')
    a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p]
    coauthors = {}
    for rec, affs in a:
        keys = affs.keys()
        for n in keys:
            try:
                coauthors[n].add(rec)
            except KeyError:
                coauthors[n] = set([rec])

    coauthors = [(x, x, len(coauthors[x])) for x in coauthors if x.lower() != personid.lower()]
    return coauthors
Beispiel #3
0
def _get_hepnames_data_fallback(bibauthorid_data, person_id):
    '''
    Returns  hepnames data
    @param bibauthorid_data: dict with 'is_baid':bool, 'cid':canonicalID, 'pid':personid
    '''
    cid = str(person_id)
    hepdict = {}
    if bibauthorid_data['cid']:
        cid = bibauthorid_data['cid']
    hepRecord = perform_request_search(rg=0, cc='HepNames', p=cid)[:CFG_WEBAUTHORPROFILE_MAX_HEP_CHOICES]

    hepdict['cid'] = cid
    hepdict['pid'] = person_id

    if not hepRecord or len(hepRecord) > 1:
        #present choice dialog with alternatives?
        names_dict = get_person_names_dicts(person_id)
        dbnames = names_dict[0]['db_names_dict'].keys()
        query = ' or '.join(['"%s"' % str(n) for n in dbnames])
        additional_records = perform_request_search(rg=0, cc='HepNames', p=query)[:CFG_WEBAUTHORPROFILE_MAX_HEP_CHOICES]
        hepRecord += additional_records
        hepdict['HaveHep'] = False
        hepdict['HaveChoices'] = bool(hepRecord)
        #limits possible choiches!
        hepdict['HepChoices'] = [(format_record(x, 'hb'), x) for x in hepRecord ]
        hepdict['heprecord'] = hepRecord
        hepdict['bd'] = bibauthorid_data
    else:
        #show the heprecord we just found.
        hepdict['HaveHep'] = True
        hepdict['HaveChoices'] = False
        hepdict['heprecord'] = format_record(hepRecord[0], 'hd')
        hepdict['bd'] = bibauthorid_data
    return hepdict
Beispiel #4
0
def get_kbd_values(kbname, searchwith=""):
    """Return a list of values by searching a dynamic kb.

    :param kbname:     name of the knowledge base
    :param searchwith: a term to search with
    """
    from invenio.legacy import search_engine

    # first check that the kb in question is dynamic
    kb = get_kb_by_name(kbname)
    kbid = kb.id
    if not kbid:
        return []
    kbtype = kb.kbtype
    if not kbtype:
        return []
    if kbtype != 'd':
        return []
    # get the configuration so that we see what the field is
    confdict = kb.kbdefs.to_dict()
    if not confdict:
        return []
    if 'field' not in confdict:
        return []
    field = confdict['field']
    expression = confdict['expression']
    collection = ""
    if 'collection' in confdict:
        collection = confdict['collection']
    reclist = []  # return this
    if searchwith and expression:
        if (expression.count('%') > 0):
            expression = expression.replace("%", searchwith)
            reclist = search_engine.perform_request_search(p=expression,
                                                           cc=collection)
        else:
            # no %.. just make a combination
            expression = expression + " and " + searchwith
            reclist = search_engine.perform_request_search(p=expression,
                                                           cc=collection)
    else:  # either no expr or no searchwith.. but never mind about searchwith
        if expression:  # in this case: only expression
            reclist = search_engine.perform_request_search(p=expression,
                                                           cc=collection)
        else:
            # make a fake expression so that only records that have this field
            # will be returned
            fake_exp = "/.*/"
            if searchwith:
                fake_exp = searchwith
            reclist = search_engine.perform_request_search(f=field, p=fake_exp,
                                                           cc=collection)
    if reclist:
        return [val for (val, dummy) in
                search_engine.get_most_popular_field_values(reclist, field)]
    return []  # in case nothing worked
Beispiel #5
0
def get_kbd_values_by_def(confdict, searchwith=""):
    """Return a list of values by searching a dynamic kb.

    :param confdict: dictionary with keys "field", "expression"
        and "collection" name
    :param searchwith: a term to search with
    :return: list of values
    """
    from invenio.legacy import search_engine

    # get the configuration so that we see what the field is
    if not confdict:
        return []
    if 'field' not in confdict:
        return []
    field = confdict['field']
    expression = confdict['expression']
    collection = ""
    if 'collection' in confdict:
        collection = confdict['collection']
    reclist = []  # return this
    if searchwith and expression:
        if (expression.count('%') > 0):
            expression = expression.replace("%", searchwith)
            reclist = search_engine.perform_request_search(p=expression,
                                                           cc=collection)
        else:
            # no %.. just make a combination
            expression = expression + " and " + searchwith
            reclist = search_engine.perform_request_search(p=expression,
                                                           cc=collection)
    else:  # either no expr or no searchwith.. but never mind about searchwith
        if expression:  # in this case: only expression
            reclist = search_engine.perform_request_search(p=expression,
                                                           cc=collection)
        else:
            # make a fake expression so that only records that have this field
            # will be returned
            fake_exp = "/.*/"
            if searchwith:
                fake_exp = searchwith
            reclist = search_engine.perform_request_search(f=field,
                                                           p=fake_exp,
                                                           cc=collection)
    if reclist:
        return [
            val
            for (val, dummy) in search_engine.get_most_popular_field_values(
                reclist, field)
        ]
    return []  # in case nothing worked
Beispiel #6
0
def query_get_hot(comments, ln, top, user_collections, collection):
    """
    private function
    @param comments:  boolean indicating if we want to retrieve comments or reviews
    @param ln: language
    @param top: number of results to display
    @param user_collections: allowed collections for the user
    @param collection: collection to display
    @return: tuple (id_bibrec, date_last_comment, users, count)
    """
    qdict = {
        'id_bibrec': 0,
        'date_last_comment': 1,
        'users': 2,
        'total_count': 3
    }
    query = """SELECT c.id_bibrec,
               DATE_FORMAT(max(c.date_creation), '%%Y-%%m-%%d %%H:%%i:%%S') as date_last_comment,
               count(distinct c.id_user) as users,
               count(*) as count
               FROM "cmtRECORDCOMMENT" c
               %s
               GROUP BY c.id_bibrec
               ORDER BY count(*) DESC
               LIMIT %s
    """
    where_clause = "WHERE " + (
        comments and 'c.star_score=0' or 'c.star_score>0'
    ) + """ AND c.status='ok' AND c.nb_abuse_reports < %s""" % CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN

    res = run_sql(query % (where_clause, top))

    collection_records = []
    if collection == 'Show all':
        for collection_name in user_collections:
            collection_records.extend(
                perform_request_search(cc=collection_name))
    else:
        collection_records.extend(perform_request_search(cc=collection))

    output = []
    for qtuple in res:
        if qtuple[qdict['id_bibrec']] in collection_records:
            general_infos_tuple = (qtuple[qdict['id_bibrec']],
                                   qtuple[qdict['date_last_comment']],
                                   qtuple[qdict['users']],
                                   qtuple[qdict['total_count']])
            output.append(general_infos_tuple)
    return tuple(output)
Beispiel #7
0
def get_tweets(query):
    """
    This is how simple it is to fetch tweets :-)
    """
    ## We shall skip tweets that already in the system.
    previous_tweets = perform_request_search(p='980__a:"TWEET" 980__b:"%s"' %
                                             query,
                                             sf='970__a',
                                             so='a')
    if previous_tweets:
        ## A bit of an algorithm to retrieve the last Tweet ID that was stored
        ## in our records
        since_id = int(get_fieldvalues(previous_tweets[0], '970__a')[0])
    else:
        since_id = 0
    final_results = []
    results = list(
        _TWITTER_API.Search(query, rpp=100, since_id=since_id).results)
    final_results.extend(results)
    page = 1
    while len(results
              ) == 100:  ## We stop if there are less than 100 results per page
        page += 1
        results = list(
            _TWITTER_API.Search(query, rpp=100, since_id=since_id,
                                page=page).results)
        final_results.extend(results)
    return final_results
Beispiel #8
0
def build_issns_from_local_site():
    """
    Retrieves the ISSNs from the local database.
    Store the "journal name -> issn" relation.

    Normalize journal names a little bit:
        - strip whithespace chars (left and right)
        - all lower case
        - remove "[Online]" suffix

    Print the result as Python dict structure.
    """

    rec_id_list = perform_request_search(cc='Periodicals',
                                         of='id')
    built_issns = {}
    #built_issns = issns # Uncomment this to extend existing issns dict
                         # (e.g. in case of manual addition)
    for rec_id in rec_id_list:
        journal_name_list = get_fieldvalues(rec_id, '210__%')
        issn_list = get_fieldvalues(rec_id, '022__a')
        if issn_list:
            issn = issn_list[0] # There should be only one ISSN
            for journal_name in journal_name_list:
                # Depending on how journal names are entered into the database,
                # you might want to do some processing before saving:
                journal_name = journal_name.lower().strip()
                if journal_name.endswith("[online]"):
                    journal_name = journal_name[:-8].rstrip()

                built_issns[journal_name] = issn

    prtyp = pprint.PrettyPrinter(indent=4)
    prtyp.pprint(built_issns)
Beispiel #9
0
def perform_candidate_record_search(requestType, data):
    """Handle search requests.
    """
    max_results = 999
    too_many = False
    result = {
        'resultCode': 0,
        'resultText': ''
        }
    if requestType == "searchCandidates":
        recids = perform_request_search( p=data['query'] )
        if len(recids) > max_results:
            too_many = True
        else:
            captions = [ search_result_info(x) for x in recids ]
            alternative_titles = [ remove_html_markup(print_record(x, "hs")) for x in recids ]
            search_results = [recids, captions, alternative_titles]
    elif requestType == "searchRevisions":
        revisions = get_record_revision_ids( data['recID1'] )
        captions = [ split_revid(x, 'datetext')[1] for x in revisions ]
        search_results = [revisions, captions]

    if too_many == True:
        result['resultCode'] = 1
        result['resultText'] = 'Too many results'
    else:
        result['results'] = search_results
        result['resultText'] = '%s results' % len(search_results[0])

    return result
Beispiel #10
0
def get_recids_for_set_spec(set_spec):
    """
    Returns the list (as intbitset) of recids belonging to 'set'

    Parameters:

      set_spec - *str* the set_spec for which we would like to get the
                 recids
    """
    recids = intbitset()

    for set_def in get_set_definitions(set_spec):
        new_recids = perform_request_search(
            c=[coll.strip() for coll in set_def["c"].split(",")],
            p1=set_def["p1"],
            f1=set_def["f1"],
            m1=set_def["m1"],
            op1=set_def["op1"],
            p2=set_def["p2"],
            f2=set_def["f2"],
            m2=set_def["m2"],
            op2=set_def["op2"],
            p3=set_def["p3"],
            f3=set_def["f3"],
            m3=set_def["m3"],
            ap=0,
        )

        recids |= intbitset(new_recids)

    return recids
Beispiel #11
0
def _submit_changes_to_bibupload(search_criteria, update_commands, upload_mode, tag_list, collection, req, checked_records):
    """This methods takes care of submitting the changes to the server
    through bibupload.

    @param search_criteria: the search criteria used for filtering the
    records. The changes will be applied to all the records matching
    the criteria

    @param update_commands: the commands defining the changes. These
    commands perform the necessary changes before the records are submitted
    """
    if collection == "Any collection":
        collection = ""
    record_IDs = search_engine.perform_request_search(p=search_criteria, c=collection)
    num_records = len(record_IDs)

    updated_records = []
    # Intersection of record_IDs list and checked_records
    id_and_checked = list(set(record_IDs) & set(checked_records))

    for current_id in id_and_checked:
        current_updated_record = _get_updated_record(current_id, update_commands)
        updated_records.append(current_updated_record)

    file_path = _get_file_path_for_bibupload()
    _save_records_xml(updated_records, file_path, upload_mode, tag_list)
    return _upload_file_with_bibupload(file_path, upload_mode, num_records, req)
def format_element(bfo, newline=False, show_doi=False):
    """Print link to proceedings if the proceedings exist.

    @param newline: if True, add <br /> at the end
    @param show_doi: if True, show DOI of the proceeding in brackets
    """
    cnum = str(bfo.field('111__g'))
    out = ""
    if not cnum:
        # something is wrong, return empty string
        return out
    search_result = perform_request_search(p="773__w:" + cnum + " and 980__a:proceedings")
    if search_result:
        if len(search_result) > 1:
            # multiple proceedings
            proceedings = []
            for i, recID in enumerate(search_result):
                # check for the DOI and put it in brackets in the output
                doi = get_fieldvalues(recID, '0247_a')
                if show_doi and doi:
                    proceedings.append('<a href="/record/%(ID)s">#%(number)s</a> (DOI: <a href="http://dx.doi.org/%(doi)s">%(doi)s</a>)'
                                       % {'ID': recID, 'number': i+1, 'doi': doi[0]})
                else:
                    proceedings.append('<a href="/record/%(ID)s">#%(number)s</a>' % {'ID': recID, 'number': i+1})
            out = 'Proceedings: '
            out += ', '.join(proceedings)
        elif len(search_result) == 1:
            # only one proceeding
            out += '<a href="/record/' + str(search_result[0]) + '">Proceedings</a>'
        if newline:
            out += '<br/>'
    return out
Beispiel #13
0
def get_recids_for_set_spec(set_spec):
    """
    Returns the list (as intbitset) of recids belonging to 'set'

    Parameters:

      set_spec - *str* the set_spec for which we would like to get the
                 recids
    """
    recids = intbitset()

    for set_def in get_set_definitions(set_spec):
        new_recids = perform_request_search(c=[coll.strip() \
                                               for coll in set_def['c'].split(',')],
                                            p1=set_def['p1'],
                                            f1=set_def['f1'],
                                            m1=set_def['m1'],
                                            op1=set_def['op1'],
                                            p2=set_def['p2'],
                                            f2=set_def['f2'],
                                            m2=set_def['m2'],
                                            op2=set_def['op2'],
                                            p3=set_def['p3'],
                                            f3=set_def['f3'],
                                            m3=set_def['m3'],
                                            ap=0)

        recids |= intbitset(new_recids)

    return recids
Beispiel #14
0
def _get_pubs_fallback(person_id):
    '''
    person's publication list.
    @param person_id: int person id
    '''
    pubs = perform_request_search(rg=0, p='exactauthor:"%s"' % str(person_id))
    return pubs
Beispiel #15
0
def query_records(params):
    """Produce record IDs from given query parameters.

    By passing the appriopriate CLI options, we can query here for additional
    records.
    """
    write_message("Querying database (records query)...")
    res = intbitset()
    if params['field'] or params['collection'] or params['pattern']:

        if not params['collection']:
            # use search_pattern() whenever possible, as it can search
            # even in private collections
            res = search_pattern(p=params['pattern'],
                                 f=params['field'],
                                 m=params['matching'])
        else:
            # use perform_request_search when '-c' argument has been
            # defined, as it is not supported by search_pattern()
            res = intbitset(perform_request_search(req=None,
                                                   of='id',
                                                   c=params['collection'],
                                                   p=params['pattern'],
                                                   f=params['field']))
    return res
Beispiel #16
0
def perform_candidate_record_search(requestType, data):
    """Handle search requests.
    """
    max_results = 999
    too_many = False
    result = {'resultCode': 0, 'resultText': ''}
    if requestType == "searchCandidates":
        recids = perform_request_search(p=data['query'])
        if len(recids) > max_results:
            too_many = True
        else:
            captions = [search_result_info(x) for x in recids]
            alternative_titles = [
                remove_html_markup(print_record(x, "hs")) for x in recids
            ]
            search_results = [recids, captions, alternative_titles]
    elif requestType == "searchRevisions":
        revisions = get_record_revision_ids(data['recID1'])
        captions = [split_revid(x, 'datetext')[1] for x in revisions]
        search_results = [revisions, captions]

    if too_many == True:
        result['resultCode'] = 1
        result['resultText'] = 'Too many results'
    else:
        result['results'] = search_results
        result['resultText'] = '%s results' % len(search_results[0])

    return result
Beispiel #17
0
def build_issns_from_local_site():
    """
    Retrieves the ISSNs from the local database.
    Store the "journal name -> issn" relation.

    Normalize journal names a little bit:
        - strip whithespace chars (left and right)
        - all lower case
        - remove "[Online]" suffix

    Print the result as Python dict structure.
    """

    rec_id_list = perform_request_search(cc='Periodicals', of='id')
    built_issns = {}
    #built_issns = issns # Uncomment this to extend existing issns dict
    # (e.g. in case of manual addition)
    for rec_id in rec_id_list:
        journal_name_list = get_fieldvalues(rec_id, '210__%')
        issn_list = get_fieldvalues(rec_id, '022__a')
        if issn_list:
            issn = issn_list[0]  # There should be only one ISSN
            for journal_name in journal_name_list:
                # Depending on how journal names are entered into the database,
                # you might want to do some processing before saving:
                journal_name = journal_name.lower().strip()
                if journal_name.endswith("[online]"):
                    journal_name = journal_name[:-8].rstrip()

                built_issns[journal_name] = issn

    prtyp = pprint.PrettyPrinter(indent=4)
    prtyp.pprint(built_issns)
def get_child_htmls(this_recID, cc_val, c_val, record_url_pattern,
                    link_pattern):
    """children aren'r referenced by parents, so we need special treatment to find
    them"""
    control_nos = get_control_nos_from_recID(this_recID)
    for control_no in control_nos:
        url = ''
        p_val = '510%4:"' + control_no + '" and 510%w:t'
        # find a first, fuzzy result set
        # narrowing down on a few possible recIDs
        recIDs = perform_request_search(cc=cc_val,
                                        c=c_val,
                                        p=p_val)
        # now filter to find the ones where the subfield conditions of p_val
        # are both true within the exact same field
        sf_req = [('w', 't'), ('4', control_no)]
        recIDs = filter(lambda x:
                            match_all_subfields_for_tag(x, '510', sf_req),
                        recIDs)
        # proceed with assembling the html link
        child_htmls = []
        for recID in recIDs:
            url = record_url_pattern % str(recID)
            display = guess_main_name_from_authority_recID(recID) or str(recID)
            out_html = link_pattern % (url, display)
            child_htmls.append(out_html)
        return child_htmls
def goto(cc=CFG_SITE_NAME, p='', f='', sf='date', so='d',
         docname='', format=''):
    """
    Redirect the user to the latest record in the given collection.

    Redirect the user to the latest record in the given collection,
    optionally within the specified pattern and field. If docname
    and format are specified, redirect the user to the corresponding
    docname and format. If docname it is not specified, but there is
    only a single bibdoc attached to the record will redirect to that
    one.
    """
    recids = perform_request_search(cc=cc, p=p, f=f, sf=sf, so=so)
    if recids:
        # The first is the most recent because they are sorted by date
        # descending.
        recid = recids[0]
        url = '/%s/%s' % (CFG_SITE_RECORD, recid)
        if format:
            bibrecdocs = BibRecDocs(recid)
            if not docname:
                if len(bibrecdocs.get_bibdoc_names()) == 1:
                    docname = bibrecdocs.get_bibdoc_names()[0]
                else:
                    return url
            try:
                bibdoc = BibRecDocs(recid).get_bibdoc(docname)
            except InvenioBibDocFileError:
                return url
            try:
                bibdocfile = bibdoc.get_file(format=format)
                return bibdocfile.get_url()
            except InvenioBibDocFileError:
                return url
        return url
Beispiel #20
0
def get_recids_for_set_spec(set_spec):
    """
    Returns the list (as intbitset) of recids belonging to 'set'

    Parameters:

      set_spec - *str* the set_spec for which we would like to get the
                 recids
    """
    recids = intbitset()

    for set_def in get_set_definitions(set_spec):
        new_recids = perform_request_search(c=[coll.strip() \
                                               for coll in set_def['c'].split(',')],
                                            p1=set_def['p1'],
                                            f1=set_def['f1'],
                                            m1=set_def['m1'],
                                            op1=set_def['op1'],
                                            p2=set_def['p2'],
                                            f2=set_def['f2'],
                                            m2=set_def['m2'],
                                            op2=set_def['op2'],
                                            p3=set_def['p3'],
                                            f3=set_def['f3'],
                                            m3=set_def['m3'],
                                            ap=0)

        recids |= intbitset(new_recids)

    return recids
Beispiel #21
0
def query_records(params):
    """Produce record IDs from given query parameters.

    By passing the appriopriate CLI options, we can query here for additional
    records.
    """
    write_message("Querying database (records query)...")
    res = intbitset()
    if params['field'] or params['collection'] or params['pattern']:

        if not params['collection']:
            # use search_pattern() whenever possible, as it can search
            # even in private collections
            res = search_pattern(p=params['pattern'],
                                 f=params['field'],
                                 m=params['matching'])
        else:
            # use perform_request_search when '-c' argument has been
            # defined, as it is not supported by search_pattern()
            res = intbitset(
                perform_request_search(req=None,
                                       of='id',
                                       c=params['collection'],
                                       p=params['pattern'],
                                       f=params['field']))
    return res
Beispiel #22
0
def get_kbd_values_by_def(confdict, searchwith=""):
    """Return a list of values by searching a dynamic kb.

    :param confdict: dictionary with keys "field", "expression"
        and "collection" name
    :param searchwith: a term to search with
    :return: list of values
    """
    from invenio.legacy import search_engine

    # get the configuration so that we see what the field is
    if not confdict:
        return []
    if 'field' not in confdict:
        return []
    field = confdict['field']
    expression = confdict['expression']
    collection = ""
    if 'collection' in confdict:
        collection = confdict['collection']
    reclist = []  # return this
    if searchwith and expression:
        if (expression.count('%') > 0):
            expression = expression.replace("%", searchwith)
            reclist = search_engine.perform_request_search(p=expression,
                                                           cc=collection)
        else:
            # no %.. just make a combination
            expression = expression + " and " + searchwith
            reclist = search_engine.perform_request_search(p=expression,
                                                           cc=collection)
    else:  # either no expr or no searchwith.. but never mind about searchwith
        if expression:  # in this case: only expression
            reclist = search_engine.perform_request_search(p=expression,
                                                           cc=collection)
        else:
            # make a fake expression so that only records that have this field
            # will be returned
            fake_exp = "/.*/"
            if searchwith:
                fake_exp = searchwith
            reclist = search_engine.perform_request_search(f=field, p=fake_exp,
                                                           cc=collection)
    if reclist:
        return [val for (val, dummy) in
                search_engine.get_most_popular_field_values(reclist, field)]
    return []  # in case nothing worked
Beispiel #23
0
def cb_parse_option(key, value, opts, args):
    """ Must be defined for bibtask to create a task """
    if args and len(args) > 0:
        # There should be no standalone arguments for any refextract job
        # This will catch args before the job is shipped to Bibsched
        raise StandardError("Error: Unrecognised argument '%s'." % args[0])

    if key in ('-a', '--new'):
        task_set_option('new', True)
        task_set_option('no-overwrite', True)
    elif key in ('-m', '--modified'):
        task_set_option('modified', True)
        task_set_option('no-overwrite', True)
    elif key == '--inspire':
        msg = """The --inspire option does not exist anymore.
Please set the config variable CFG_INSPIRE_SITE instead."""
        raise StandardError(msg)
    elif key in ('--kb-reports', ):
        task_set_option('kb-reports', value)
    elif key in ('--kb-journals', ):
        task_set_option('kb-journals', value)
    elif key in ('--kb-journals-re', ):
        task_set_option('kb-journals-re', value)
    elif key in ('--kb-authors', ):
        task_set_option('kb-authors', value)
    elif key in ('--kb-books', ):
        task_set_option('kb-books', value)
    elif key in ('--kb-conferences', ):
        task_set_option('kb-conferences', value)
    elif key in ('--create-ticket', ):
        task_set_option('create-ticket', True)
    elif key in ('--no-overwrite', ):
        task_set_option('no-overwrite', True)
    elif key in ('--arxiv'):
        task_set_option('arxiv', True)
    elif key in ('-c', '--collections'):
        collections = task_get_option('collections')
        if not collections:
            collections = set()
            task_set_option('collections', collections)
        for v in value.split(","):
            collections.update(perform_request_search(c=v))
    elif key in ('-i', '--id'):
        recids = task_get_option('recids')
        if not recids:
            recids = set()
            task_set_option('recids', recids)
        recids.update(split_ids(value))
    elif key in ('-r', '--recids'):
        msg = """The --recids has been renamed.
please use --id for specifying recids."""
        raise StandardError(msg)
    elif key == '-f':
        msg = """refextract is now used to run in daemon mode only.
If you would like to run reference extraction on a standalone PDF file,
please use "docextract file.pdf\""""
        raise StandardError(msg)

    return True
Beispiel #24
0
    def tmpl_papers_box(self, req, pubs, bibauthorid_data, num_downloads, ln, add_box=True, loading=False):
        _ = gettext_set_language(ln)
        if not loading and pubs:
            ib_pubs = intbitset(pubs)
            if bibauthorid_data["cid"]:
                baid_query = 'exactauthor:%s' % wrap_author_name_in_quotes_if_needed(bibauthorid_data["cid"])
            elif bibauthorid_data["pid"] > -1:
                baid_query = 'exactauthor:%s' % wrap_author_name_in_quotes_if_needed(bibauthorid_data["pid"])
            baid_query = baid_query + " "

            rec_query = baid_query
            searchstr = create_html_link(websearch_templates.build_search_url(p=rec_query),
                                         {}, "<strong>" + "All papers (" + str(len(pubs)) + ")" + "</strong>",)

            line2 = searchstr

            if CFG_BIBRANK_SHOW_DOWNLOAD_STATS and num_downloads:
                line2 += " (" + _("downloaded") + " "
                line2 += str(num_downloads) + " " + _("times") + ")"

            if CFG_INSPIRE_SITE:
                CFG_COLLS = ['Book',
                             'ConferencePaper',
                             'Introductory',
                             'Lectures',
                             'Preprint',
                             'Published',
                             'Review',
                             'Thesis']
            else:
                CFG_COLLS = ['Article',
                             'Book',
                             'Preprint', ]
            collsd = {}
            for coll in CFG_COLLS:
                coll_papers = list(ib_pubs & intbitset(perform_request_search(rg=0, f="collection", p=coll)))
                if coll_papers:
                    collsd[coll] = coll_papers
            colls = collsd.keys()
            colls.sort(lambda x, y: cmp(len(collsd[y]), len(collsd[x]))) # sort by number of papers
            for coll in colls:
                rec_query = baid_query + 'collection:' + wrap_author_name_in_quotes_if_needed(coll)
                line2 += "<br />" + create_html_link(websearch_templates.build_search_url(p=rec_query),
                                                                           {}, coll + " (" + str(len(collsd[coll])) + ")",)

        elif not pubs and not loading:
            line2 = _("No Papers")

        elif loading:
            line2 = self.loading_html()

        else:
            line2 = 'This is a bug and should be corrected'

        if not add_box:
            return line2
        line1 = "<strong>" + _("Papers") + "</strong>"
        papers_box = self.tmpl_print_searchresultbox("papers", line1, line2)
        return papers_box
Beispiel #25
0
def cb_parse_option(key, value, opts, args):
    """ Must be defined for bibtask to create a task """
    if args and len(args) > 0:
        # There should be no standalone arguments for any refextract job
        # This will catch args before the job is shipped to Bibsched
        raise StandardError("Error: Unrecognised argument '%s'." % args[0])

    if key in ('-a', '--new'):
        task_set_option('new', True)
        task_set_option('no-overwrite', True)
    elif key in ('-m', '--modified'):
        task_set_option('modified', True)
        task_set_option('no-overwrite', True)
    elif key == '--inspire':
        msg = """The --inspire option does not exist anymore.
Please set the config variable CFG_INSPIRE_SITE instead."""
        raise StandardError(msg)
    elif key in ('--kb-reports', ):
        task_set_option('kb-reports', value)
    elif key in ('--kb-journals', ):
        task_set_option('kb-journals', value)
    elif key in ('--kb-journals-re', ):
        task_set_option('kb-journals-re', value)
    elif key in ('--kb-authors', ):
        task_set_option('kb-authors', value)
    elif key in ('--kb-books', ):
        task_set_option('kb-books', value)
    elif key in ('--kb-conferences', ):
        task_set_option('kb-conferences', value)
    elif key in ('--create-ticket', ):
        task_set_option('create-ticket', True)
    elif key in ('--no-overwrite', ):
        task_set_option('no-overwrite', True)
    elif key in ('--arxiv'):
        task_set_option('arxiv', True)
    elif key in ('-c', '--collections'):
        collections = task_get_option('collections')
        if not collections:
            collections = set()
            task_set_option('collections', collections)
        for v in value.split(","):
            collections.update(perform_request_search(c=v))
    elif key in ('-i', '--id'):
        recids = task_get_option('recids')
        if not recids:
            recids = set()
            task_set_option('recids', recids)
        recids.update(split_ids(value))
    elif key in ('-r', '--recids'):
        msg = """The --recids has been renamed.
please use --id for specifying recids."""
        raise StandardError(msg)
    elif key == '-f':
        msg = """refextract is now used to run in daemon mode only.
If you would like to run reference extraction on a standalone PDF file,
please use "docextract file.pdf\""""
        raise StandardError(msg)

    return True
Beispiel #26
0
    def _get_records_modified_last_month(self):
        """Returns all records modified last month and matching the criteria."""
        current_date = datetime.date.today()
        one_month_ago = current_date - datetime.timedelta(days = 31)

        #FIXME: Return only records with full texts available for Google Scholar
        #FIXME: There is a problem with searching in modification date. It searches only in creation date
        return perform_request_search(dt="m", c = self._collections, d1y = one_month_ago.year, d1m = one_month_ago.month, d1d = one_month_ago.day)
Beispiel #27
0
    def widget(self):
        user = User.query.get(current_user.get_id())
        email = user.email
        email_field = "8560_"
        deposit_count = len(perform_request_search(f=email_field, p=email, of="id"))

        return render_template_to_string('deposits_user_settings.html',
            email=email, email_field=email_field, deposit_count=deposit_count)
Beispiel #28
0
    def __call__(self, req, form):
        argd = wash_search_urlargd(form)
        argd['recid'] = self.recid

        if self.format is not None:
            argd['of'] = self.format
        req.argd = argd
        uid = getUid(req)
        if uid == -1:
            return page_not_authorized(req, "../",
                text="You are not authorized to view this record.",
                                       navmenuid='search')
        elif uid > 0:
            pref = get_user_preferences(uid)
            try:
                if 'rg' not in form:
                    # fetch user rg preference only if not overridden via URL
                    argd['rg'] = int(pref['websearch_group_records'])
            except (KeyError, ValueError):
                pass

        # Check if the record belongs to a restricted primary
        # collection.  If yes, redirect to the authenticated URL.
        user_info = collect_user_info(req)
        (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid)

        if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action(req, 'runbibedit')[0] != 0:
            argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS

        #check if the user has rights to set a high wildcard limit
        #if not, reduce the limit set by user, with the default one
        if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0):
            if acc_authorize_action(req, 'runbibedit')[0] != 0:
                argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT

        # only superadmins can use verbose parameter for obtaining debug information
        if not isUserSuperAdmin(user_info):
            argd['verbose'] = 0

        if auth_code and user_info['email'] == 'guest':
            cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
            target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                    make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {})
            return redirect_to_url(req, target, norobot=True)
        elif auth_code:
            return page_not_authorized(req, "../", \
                text=auth_msg, \
                navmenuid='search')

        # mod_python does not like to return [] in case when of=id:
        out = perform_request_search(req, **argd)
        if isinstance(out, intbitset):
            return out.fastdump()
        elif out == []:
            return str(out)
        else:
            return out
Beispiel #29
0
    def __call__(self, req, form):
        argd = wash_search_urlargd(form)
        argd['recid'] = self.recid
        if self.format is not None:
            argd['of'] = self.format

        req.argd = argd

        uid = getUid(req)
        user_info = collect_user_info(req)
        if uid == -1:
            return page_not_authorized(req, "../",
                text="You are not authorized to view this record.",
                                       navmenuid='search')
        elif uid > 0:
            pref = get_user_preferences(uid)
            try:
                if 'rg' not in form:
                    # fetch user rg preference only if not overridden via URL
                    argd['rg'] = int(pref['websearch_group_records'])
            except (KeyError, ValueError):
                pass

        if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action(req, 'runbibedit')[0] != 0:
            argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS

        #check if the user has rights to set a high wildcard limit
        #if not, reduce the limit set by user, with the default one
        if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0):
            if acc_authorize_action(req, 'runbibedit')[0] != 0:
                argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT

        # only superadmins can use verbose parameter for obtaining debug information
        if not isUserSuperAdmin(user_info):
            argd['verbose'] = 0

        record_primary_collection = guess_primary_collection_of_a_record(self.recid)

        if collection_restricted_p(record_primary_collection):
            (auth_code, dummy) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=record_primary_collection)
            if auth_code:
                return page_not_authorized(req, "../",
                    text="You are not authorized to view this record.",
                    navmenuid='search')

        # Keep all the arguments, they might be reused in the
        # record page itself to derivate other queries
        req.argd = argd

        # mod_python does not like to return [] in case when of=id:
        out = perform_request_search(req, **argd)
        if isinstance(out, intbitset):
            return out.fastdump()
        elif out == []:
            return str(out)
        else:
            return out
Beispiel #30
0
def get_widget_html(language, max_photos, collections, separator, ln):
    """
    Returns the content of the widget
    """
    latest_photo_ids = perform_request_search(c=collections,
                                              rg=max_photos,
                                              of='id')
    images_urls = []
    for recid in latest_photo_ids[:max_photos]:
        try:
            photo_record = BibFormatObject(recid)
        except:
            # todo: Exception, no photo in this selection
            continue

        if language == "fr":
            try:
                title = photo_record.fields('246_1a', escape=1)[0]
            except KeyError:
                try:
                    title = photo_record.fields('245__a', escape=1)[0]
                except:
                    title = ""
        else:
            try:
                title = photo_record.fields('245__a', escape=1)[0]
            except KeyError:
                # todo: exception, picture with no title
                title = ""

        if CFG_CERN_SITE and photo_record.fields('8567_'):
            # Get from 8567_
            dfs_images = photo_record.fields('8567_')
            for image_block in dfs_images:
                if image_block.get("y", '') == "Icon":
                    if image_block.get("u", '').startswith("http://"):
                        images_urls.append((recid, image_block["u"], title))
                        break  # Just one image per record

        else:
            # Get from 8564_
            images = photo_record.fields('8564_')
            for image_block in images:
                if image_block.get("x", '').lower() == "icon":
                    if image_block.get("q", '').startswith("http://"):
                        images_urls.append((recid, image_block["q"], title))
                        break  # Just one image per record

    # Build output
    html_out = separator.join([
        '<a href="%s/%s/%i?ln=%s"><img class="phr" width="100" height="67" src="%s"/>%s</a>'
        % (CFG_SITE_URL, CFG_SITE_RECORD, recid, ln, photo_url, title)
        for (recid, photo_url, title) in images_urls
    ])

    return html_out
Beispiel #31
0
def get_record_ids(argstr, date_from, date_until):
    """Returns the local and external records found for a specific query and timeframe."""

    argd = wash_urlargd(parse_qs(argstr), websearch_templates.search_results_default_urlargd)
    p       = argd.get('p', [])
    c       = argd.get('c', [])
    cc      = argd.get('cc', [])
    aas     = argd.get('aas', [])
    f       = argd.get('f', [])
    so      = argd.get('so', [])
    sp      = argd.get('sp', [])
    ot      = argd.get('ot', [])
    p1      = argd.get('p1', [])
    f1      = argd.get('f1', [])
    m1      = argd.get('m1', [])
    op1     = argd.get('op1', [])
    p2      = argd.get('p2', [])
    f2      = argd.get('f2', [])
    m2      = argd.get('m2', [])
    op2     = argd.get('op3', [])
    p3      = argd.get('p3', [])
    f3      = argd.get('f3', [])
    m3      = argd.get('m3', [])
    sc      = argd.get('sc', [])

    d1y, d1m, d1d = _date_to_tuple(date_from)
    d2y, d2m, d2d = _date_to_tuple(date_until)

    #alerts might contain collections that have been deleted
    #check if such collections are in the query, and if yes, do not include them in the search
    cc = Collection.query.filter_by(name=cc).value('name')
    if not cc and not c: #the alarm was for an entire collection that does not exist anymore
        return ([], ([], []))
    if c: # some collections were defined in the query
        c = [c_norm_name for c_norm_name in [
            Collection.query.filter_by(name=c_name).value('name')
            for c_name in c] if c_norm_name]
        # remove unknown collections from c
        if not c: #none of the collection selected in the alert still exist
            return ([], ([], []))

    # washed_colls = wash_colls(cc, c, sc, 0)
    # hosted_colls = washed_colls[3]
    # if hosted_colls:
    #     req_args = "p=%s&f=%s&d1d=%s&d1m=%s&d1y=%s&d2d=%s&d2m=%s&d2y=%s&ap=%i" % (p, f, d1d, d1m, d1y, d2d, d2m, d2y, 0)
    #     external_records = calculate_external_records(req_args, [p, p1, p2, p3], f, hosted_colls, CFG_EXTERNAL_COLLECTION_TIMEOUT, CFG_EXTERNAL_COLLECTION_MAXRESULTS_ALERTS)
    # else:
    # FIXME: removed support for hosted collections
    external_records = ([], [])

    recids = perform_request_search(of='id', p=p, c=c, cc=cc, f=f, so=so, sp=sp, ot=ot,
                                  aas=aas, p1=p1, f1=f1, m1=m1, op1=op1, p2=p2, f2=f2,
                                  m2=m2, op2=op2, p3=p3, f3=f3, m3=m3, sc=sc, d1y=d1y,
                                  d1m=d1m, d1d=d1d, d2y=d2y, d2m=d2m, d2d=d2d)

    return (recids, external_records)
Beispiel #32
0
    def _get_records_modified_last_month(self, export_name, export_pattern):
        """Returns all records modified last month and matching the criteria."""
        current_date = datetime.date.today()
        one_month_ago = current_date - datetime.timedelta(days=31)

        return perform_request_search(dt="m",
                                      p=export_pattern,
                                      d1y=one_month_ago.year,
                                      d1m=one_month_ago.month,
                                      d1d=one_month_ago.day)
Beispiel #33
0
def search(collection, p, of, ot, so, rm):
    """Render search page."""
    from invenio.legacy.search_engine import perform_request_search

    if 'action_browse' in request.args \
            or request.args.get('action', '') == 'browse':
        return browse()

    if 'c' in request.args and len(request.args) == 1 \
            and len(request.args.getlist('c')) == 1:
        return redirect(url_for('.collection', name=request.args.get('c')))

    argd = argd_orig = wash_search_urlargd(request.args)
    argd['of'] = 'id'

    # fix for queries like `/search?p=+ellis`
    if 'p' in argd:
        argd['p'] = argd['p'].strip()

    # update search arguments with the search user preferences
    if 'rg' not in request.values and current_user.get('rg'):
        argd['rg'] = int(current_user.get('rg'))
    rg = int(argd['rg'])

    collection_breadcrumbs(collection)

    qid = get_search_query_id(**argd)
    recids = perform_request_search(req=request.get_legacy_request(), **argd)

    # back-to-search related code
    if request and not isinstance(request.get_legacy_request(),
                                  cStringIO.OutputType):
        # store the last search results page
        session['websearch-last-query'] = request.get_legacy_request() \
                                                 .unparsed_uri
        hit_limit = current_app.config['CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT']
        if len(recids) > hit_limit:
            last_query_hits = None
        else:
            last_query_hits = recids
        # store list of results if user wants to display hits
        # in a single list, or store list of collections of records
        # if user displays hits split by collections:
        session["websearch-last-query-hits"] = last_query_hits

    ctx = dict(
        facets=facets.get_facets_config(collection, qid),
        records=len(get_current_user_records_that_can_be_displayed(qid)),
        qid=qid, rg=rg,
        create_nearest_terms_box=lambda: _create_neareset_term_box(argd_orig),
        easy_search_form=EasySearchForm(csrf_enabled=False),
        ot=ot
    )

    return response_formated_records(recids, collection, of, **ctx)
Beispiel #34
0
def search(collection, p, of, ot, so, rm):
    """Render search page."""
    from invenio.legacy.search_engine import perform_request_search

    if 'action_browse' in request.args \
            or request.args.get('action', '') == 'browse':
        return browse()

    if 'c' in request.args and len(request.args) == 1 \
            and len(request.args.getlist('c')) == 1:
        return redirect(url_for('.collection', name=request.args.get('c')))

    argd = argd_orig = wash_search_urlargd(request.args)
    argd['of'] = 'id'

    # fix for queries like `/search?p=+ellis`
    if 'p' in argd:
        argd['p'] = argd['p'].strip()

    # update search arguments with the search user preferences
    if 'rg' not in request.values and current_user.get('rg'):
        argd['rg'] = int(current_user.get('rg'))
    rg = int(argd['rg'])

    collection_breadcrumbs(collection)

    qid = get_search_query_id(**argd)
    recids = perform_request_search(req=request.get_legacy_request(), **argd)

    # back-to-search related code
    if request and not isinstance(request.get_legacy_request(),
                                  cStringIO.OutputType):
        # store the last search results page
        session['websearch-last-query'] = request.get_legacy_request() \
                                                 .unparsed_uri
        hit_limit = current_app.config['CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT']
        if len(recids) > hit_limit:
            last_query_hits = None
        else:
            last_query_hits = recids
        # store list of results if user wants to display hits
        # in a single list, or store list of collections of records
        # if user displays hits split by collections:
        session["websearch-last-query-hits"] = last_query_hits

    ctx = dict(
        facets=facets.get_facets_config(collection, qid),
        records=len(get_current_user_records_that_can_be_displayed(qid)),
        qid=qid,
        rg=rg,
        create_nearest_terms_box=lambda: _create_neareset_term_box(argd_orig),
        easy_search_form=EasySearchForm(csrf_enabled=False),
        ot=ot)

    return response_formated_records(recids, collection, of, **ctx)
Beispiel #35
0
def resolve_doi(req, doi, ln=CFG_SITE_LANG, verbose=0):
    """
    Redirect to given DOI, or display error page when DOI cannot be
    resolved.
    """
    _ = gettext_set_language(ln)
    # Fetch user ID:
    try:
        uid = getUid(req)
    except Error:
        register_exception(req=req, alert_admin=True)
        return page(title=_("Internal Error"),
                    body=create_error_box(req, verbose=verbose, ln=ln),
                    description="%s - Internal Error" % CFG_SITE_NAME,
                    keywords="%s, Internal Error" % CFG_SITE_NAME,
                    language=ln,
                    req=req,
                    navmenuid='search')
    # Resolve DOI
    recids = perform_request_search(p='doi:"%s"' % doi, of="id", verbose=verbose)
    recids = [recid for recid in recids if doi.lower() in \
              [doi.lower() for doi in get_record(recid).get('doi', '') if doi]]

    # Answer
    if len(recids) == 1:
        # Found unique matching record
        return redirect_to_url(req, CFG_SITE_URL + '/' + CFG_SITE_RECORD + '/' + str(recids[0]))
    elif len(recids) == 0:
        # No corresponding record found
        page_body = '<p>' + (_("Sorry, DOI %(x_doi)s could not be resolved.", x_doi=('<strong>' + str(doi) + '</strong>'))) + '</p>'
        if req.header_only:
            raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
        return page(title=_('DOI "%(x_doi)s" Not Found', x_doi=cgi.escape(doi)),
                    body=page_body,
                    description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(doi))),
                    keywords="%s" % CFG_SITE_NAME,
                    uid=uid,
                    language=ln,
                    req=req,
                    navmenuid='search')
    else:
        # Found multiple matching records
        try:
            raise Exception('DOI "%s" matched multiple records (%s) -- Please check' % (doi, ', '.join([str(recid) for recid in recids])))
        except Exception, e:
            register_exception(req=req, alert_admin=True)
        page_body = websearch_templates.tmpl_multiple_dois_found_page(doi, recids, ln)
        return page(title=_('Found multiple records matching DOI %(x_doi)s', x_doi=cgi.escape(doi)),
                    body=page_body,
                    description=(CFG_SITE_NAME + ' - ' + _("Found multiple records matching DOI") + ': ' + cgi.escape(str(doi))),
                    keywords="%s" % CFG_SITE_NAME,
                    uid=uid,
                    language=ln,
                    req=req,
                    navmenuid='search')
Beispiel #36
0
    def widget(self):
        user = User.query.get(current_user.get_id())
        email = user.email
        email_field = "8560_"
        deposit_count = len(
            perform_request_search(f=email_field, p=email, of="id"))

        return render_template_to_string('deposits_user_settings.html',
                                         email=email,
                                         email_field=email_field,
                                         deposit_count=deposit_count)
def get_main_htmls(see_also_dicts, cc_val, c_val, record_url_pattern,
                   search_url_pattern, link_pattern):
    """parent_htmls, predecessor_htmls, successor_htmls can all be deduced
    directly from the metadata of the record"""
    # reusable vars
    f_val = CFG_BIBAUTHORITY_RECORD_CONTROL_NUMBER_FIELD
    sc_val = "1"
    parent_htmls = []
    predecessor_htmls = []
    successor_htmls = []

    # start processing
    for see_also_dict in see_also_dicts:
        if 'w' in see_also_dict:
            # $w contains 'a' for predecessor, 'b' for successor, etc.
            w_subfield = see_also_dict.get('w')
            # $4 contains control_no of linked authority record
            _4_subfield = see_also_dict.get('4')
            # $a contains the name of the linked institution
            out_string = see_also_dict.get('a') or _4_subfield
            # if we have something to display
            if out_string:
                url = ''
                # if we have a control number
                if _4_subfield:
                    p_val = _4_subfield
#                    if CFG_BIBAUTHORITY_PREFIX_SEP in _4_subfield:
#                        unused, p_val = _4_subfield.split(CFG_BIBAUTHORITY_PREFIX_SEP);
                    recIDs = perform_request_search(cc=cc_val,
                                                    c=c_val,
                                                    p=p_val,
                                                    f=f_val)
                    if len(recIDs) == 1:
                        url = record_url_pattern % (recIDs[0])
                    elif len(recIDs) > 1:
                        p_val = "recid:" + \
                            " or recid:".join([str(r) for r in recIDs])
                        url = search_url_pattern % (cc_val,
                                                      c_val,
                                                      p_val,
                                                      sc_val)
                # if we found one or multiple records for the control_no,
                # make the out_string a clickable url towards those records
                if url:
                    out_string = link_pattern % (url, out_string)
                # add the out_string to the appropriate list
                if w_subfield == 't':
                    parent_htmls.append(out_string)
                elif w_subfield == 'a':
                    predecessor_htmls.append(out_string)
                elif w_subfield == 'b':
                    successor_htmls.append(out_string)
    # return
    return parent_htmls, predecessor_htmls, successor_htmls
Beispiel #38
0
def _get_self_pubs_bai(person_id):
    '''
    person's publication list.
    @param person_id: int person id
    '''
    cid = get_canonical_id_from_personid(person_id)
    try:
        cid = cid[0][0]
    except IndexError:
        cid = person_id
    return perform_request_search(rg=0, p='author:%s and authorcount:1' % cid)
Beispiel #39
0
def query_get_hot(comments, ln, top, user_collections, collection):
    """
    private function
    @param comments:  boolean indicating if we want to retrieve comments or reviews
    @param ln: language
    @param top: number of results to display
    @param user_collections: allowed collections for the user
    @param collection: collection to display
    @return: tuple (id_bibrec, date_last_comment, users, count)
    """
    qdict = {'id_bibrec': 0, 'date_last_comment': 1, 'users': 2, 'total_count': 3}
    query = """SELECT c.id_bibrec,
               DATE_FORMAT(max(c.date_creation), '%%Y-%%m-%%d %%H:%%i:%%S') as date_last_comment,
               count(distinct c.id_user) as users,
               count(*) as count
               FROM cmtRECORDCOMMENT c
               %s
               GROUP BY c.id_bibrec
               ORDER BY count(*) DESC
               LIMIT %s
    """
    where_clause = "WHERE " + (comments and 'c.star_score=0' or 'c.star_score>0') + ' AND c.status="ok" AND c.nb_abuse_reports < %s' % CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN

    res = run_sql(query % (where_clause, top))

    collection_records = []
    if collection == 'Show all':
        for collection_name in user_collections:
            collection_records.extend(perform_request_search(cc=collection_name))
    else:
        collection_records.extend(perform_request_search(cc=collection))

    output = []
    for qtuple in res:
        if qtuple[qdict['id_bibrec']] in collection_records:
            general_infos_tuple = (qtuple[qdict['id_bibrec']],
                                   qtuple[qdict['date_last_comment']],
                                   qtuple[qdict['users']],
                                   qtuple[qdict['total_count']])
            output.append(general_infos_tuple)
    return tuple(output)
Beispiel #40
0
    def authenticate(self, req, form):
        """Restricted search results pages."""

        argd = wash_search_urlargd(form)

        user_info = collect_user_info(req)
        for coll in argd['c'] + [argd['cc']]:
            if collection_restricted_p(coll):
                (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll)
                if auth_code and user_info['email'] == 'guest':
                    cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll})
                    target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                            make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {})
                    return redirect_to_url(req, target, norobot=True)
                elif auth_code:
                    return page_not_authorized(req, "../", \
                        text=auth_msg, \
                        navmenuid='search')

        #check if the user has rights to set a high wildcard limit
        #if not, reduce the limit set by user, with the default one
        if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0):
            auth_code, auth_message = acc_authorize_action(req, 'runbibedit')
            if auth_code != 0:
                argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT

        # only superadmins can use verbose parameter for obtaining debug information
        if not isUserSuperAdmin(user_info):
            argd['verbose'] = 0

        # Keep all the arguments, they might be reused in the
        # search_engine itself to derivate other queries
        req.argd = argd

        uid = getUid(req)
        if uid > 0:
            pref = get_user_preferences(uid)
            try:
                if 'rg' not in form:
                    # fetch user rg preference only if not overridden via URL
                    argd['rg'] = int(pref['websearch_group_records'])
            except (KeyError, ValueError):
                pass


        # mod_python does not like to return [] in case when of=id:
        out = perform_request_search(req, **argd)
        if isinstance(out, intbitset):
            return out.fastdump()
        elif out == []:
            return str(out)
        else:
            return out
Beispiel #41
0
def get_main_htmls(see_also_dicts, cc_val, c_val, record_url_pattern,
                   search_url_pattern, link_pattern):
    """parent_htmls, predecessor_htmls, successor_htmls can all be deduced
    directly from the metadata of the record"""
    # reusable vars
    f_val = CFG_BIBAUTHORITY_RECORD_CONTROL_NUMBER_FIELD
    sc_val = "1"
    parent_htmls = []
    predecessor_htmls = []
    successor_htmls = []

    # start processing
    for see_also_dict in see_also_dicts:
        if 'w' in see_also_dict:
            # $w contains 'a' for predecessor, 'b' for successor, etc.
            w_subfield = see_also_dict.get('w')
            # $4 contains control_no of linked authority record
            _4_subfield = see_also_dict.get('4')
            # $a contains the name of the linked institute
            out_string = see_also_dict.get('a') or _4_subfield
            # if we have something to display
            if out_string:
                url = ''
                # if we have a control number
                if _4_subfield:
                    p_val = _4_subfield
                    #                    if CFG_BIBAUTHORITY_PREFIX_SEP in _4_subfield:
                    #                        unused, p_val = _4_subfield.split(CFG_BIBAUTHORITY_PREFIX_SEP);
                    recIDs = perform_request_search(cc=cc_val,
                                                    c=c_val,
                                                    p=p_val,
                                                    f=f_val)
                    if len(recIDs) == 1:
                        url = record_url_pattern % (recIDs[0])
                    elif len(recIDs) > 1:
                        p_val = "recid:" + \
                            " or recid:".join([str(r) for r in recIDs])
                        url = search_url_pattern % (cc_val, c_val, p_val,
                                                    sc_val)
                # if we found one or multiple records for the control_no,
                # make the out_string a clickable url towards those records
                if url:
                    out_string = link_pattern % (url, out_string)
                # add the out_string to the appropriate list
                if w_subfield == 't':
                    parent_htmls.append(out_string)
                elif w_subfield == 'a':
                    predecessor_htmls.append(out_string)
                elif w_subfield == 'b':
                    successor_htmls.append(out_string)
    # return
    return parent_htmls, predecessor_htmls, successor_htmls
Beispiel #42
0
def get_record_ids(argstr, date_from, date_until):
    """Returns the local and external records found for a specific query and timeframe."""

    argd = wash_urlargd(parse_qs(argstr), websearch_templates.search_results_default_urlargd)
    p       = argd.get('p', [])
    c       = argd.get('c', [])
    cc      = argd.get('cc', [])
    aas     = argd.get('aas', [])
    f       = argd.get('f', [])
    so      = argd.get('so', [])
    sp      = argd.get('sp', [])
    ot      = argd.get('ot', [])
    p1      = argd.get('p1', [])
    f1      = argd.get('f1', [])
    m1      = argd.get('m1', [])
    op1     = argd.get('op1', [])
    p2      = argd.get('p2', [])
    f2      = argd.get('f2', [])
    m2      = argd.get('m2', [])
    op2     = argd.get('op3', [])
    p3      = argd.get('p3', [])
    f3      = argd.get('f3', [])
    m3      = argd.get('m3', [])
    sc      = argd.get('sc', [])

    d1y, d1m, d1d = _date_to_tuple(date_from)
    d2y, d2m, d2d = _date_to_tuple(date_until)

    #alerts might contain collections that have been deleted
    #check if such collections are in the query, and if yes, do not include them in the search
    cc =  get_coll_normalised_name(cc)
    if not cc and not c: #the alarm was for an entire collection that does not exist anymore
        return ([], ([], []))
    if c: # some collections were defined in the query
        c = [c_norm_name for c_norm_name in [get_coll_normalised_name(c_name) for c_name in c] if c_norm_name] #remove unknown collections from c
        if not c: #none of the collection selected in the alert still exist
            return ([], ([], []))

    washed_colls = wash_colls(cc, c, sc, 0)
    hosted_colls = washed_colls[3]
    if hosted_colls:
        req_args = "p=%s&f=%s&d1d=%s&d1m=%s&d1y=%s&d2d=%s&d2m=%s&d2y=%s&ap=%i" % (p, f, d1d, d1m, d1y, d2d, d2m, d2y, 0)
        external_records = calculate_external_records(req_args, [p, p1, p2, p3], f, hosted_colls, CFG_EXTERNAL_COLLECTION_TIMEOUT, CFG_EXTERNAL_COLLECTION_MAXRESULTS_ALERTS)
    else:
        external_records = ([], [])

    recids = perform_request_search(of='id', p=p, c=c, cc=cc, f=f, so=so, sp=sp, ot=ot,
                                  aas=aas, p1=p1, f1=f1, m1=m1, op1=op1, p2=p2, f2=f2,
                                  m2=m2, op2=op2, p3=p3, f3=f3, m3=m3, sc=sc, d1y=d1y,
                                  d1m=d1m, d1d=d1d, d2y=d2y, d2m=d2m, d2d=d2d)

    return (recids, external_records)
def get_widget_html(language, max_photos, collections, separator, ln):
    """
    Returns the content of the widget
    """
    latest_photo_ids = perform_request_search(c=collections,
                                              rg=max_photos,
                                              of='id')
    images_urls = []
    for recid in latest_photo_ids[:max_photos]:
        try:
            photo_record = BibFormatObject(recid)
        except:
            # todo: Exception, no photo in this selection
            continue

        if language == "fr":
            try:
                title = photo_record.fields('246_1a', escape=1)[0]
            except KeyError:
                try:
                    title = photo_record.fields('245__a', escape=1)[0]
                except:
                    title = ""
        else:
            try:
                title = photo_record.fields('245__a', escape=1)[0]
            except KeyError:
                # todo: exception, picture with no title
                title = ""

        if CFG_CERN_SITE and photo_record.fields('8567_'):
            # Get from 8567_
            dfs_images = photo_record.fields('8567_')
            for image_block in dfs_images:
                if image_block.get("y", '') == "Icon":
                    if image_block.get("u", '').startswith("http://"):
                        images_urls.append((recid, image_block["u"], title))
                        break # Just one image per record

        else:
            # Get from 8564_
            images = photo_record.fields('8564_')
            for image_block in images:
                if image_block.get("x", '').lower() == "icon":
                    if image_block.get("q", '').startswith("http://"):
                        images_urls.append((recid, image_block["q"], title))
                        break # Just one image per record

    # Build output
    html_out = separator.join(['<a href="%s/%s/%i?ln=%s"><img class="phr" width="100" height="67" src="%s"/>%s</a>' % (CFG_SITE_URL, CFG_SITE_RECORD, recid, ln, photo_url, title) for (recid, photo_url, title) in images_urls])

    return html_out
Beispiel #44
0
def get_recids_for_rules(rules):
    """
    Generates the final list of record IDs to load.

    @param rules dict of rules {rule_name: rule_dict}
    @type rules: dict of rules

    @return dict {rule_name: array of record IDs}
    """
    override_record_ids = task_get_option("record_ids")
    recids = {}
    for rule_name, rule in rules.iteritems():
        if "filter_pattern" in rule:
            query = rule["filter_pattern"]
            if "filter_collection" in rule:
                collections = rule["filter_collection"].split()
            else:
                collections = None
            write_message("Performing given search query: '%s'" % query)
            if collections:
                result = perform_request_search(p=query,
                                                of='intbitset',
                                                wl=rule.get('filter_limit', 0),
                                                f=rule.get(
                                                    'filter_field', None),
                                                c=collections)
            else:
                result = search_pattern(
                    p=query,
                    wl=rule.get('filter_limit', 0),
                    f=rule.get('filter_field', None),
                )
        else:
            result = intbitset(trailing_bits=True)

        if override_record_ids is not None:
            result.intersection_update(override_record_ids)
        else:
            last_run = get_rule_lastrun(rule_name)
            modified_recids = get_modified_records_since(last_run)
            if not "consider_deleted_records" in rule:
                modified_recids -= search_unit_in_bibxxx(p='DELETED',
                                                         f='980__%',
                                                         type='e')
                if CFG_CERN_SITE:
                    modified_recids -= search_unit_in_bibxxx(p='DUMMY',
                                                             f='980__%',
                                                             type='e')
            result.intersection_update(modified_recids)
        recids[rule_name] = result

    return recids
Beispiel #45
0
    def _get_records(self, search_criteria):
        """Creates MARC XML containing all the records corresponding
        to a given search criteria.

        @param search_criteria: combination of search terms in Invenio

        @return: MARC XML containing all the records corresponding
        to the search criteria"""
        record_IDs = search_engine.perform_request_search(p=search_criteria)

        records_XML = self._create_records_xml(record_IDs)

        return records_XML
Beispiel #46
0
    def _get_records(self, search_criteria):
        """Creates MARC XML containing all the records corresponding
        to a given search criteria.

        @param search_criteria: combination of search terms in Invenio

        @return: MARC XML containing all the records corresponding
        to the search criteria"""
        record_IDs = search_engine.perform_request_search(p = search_criteria)

        records_XML = self._create_records_xml(record_IDs)

        return records_XML
Beispiel #47
0
def populate_cnums():
    """Populate table seqSTORE with the cnums present in CONFERENCE records."""
    # First get all records from conference collection
    conf_records = perform_request_search(cc="Conferences",
                                          p="111__g:C*", rg=0)

    for recid in conf_records:
        cnum = record_get_field_value(
            get_bibrecord(recid), tag="111", ind1="", ind2="", code="g")
        if cnum:
            if not _cnum_exists(cnum):
                _insert_cnum(cnum)
                print("cnum %s from record %s inserted" % (cnum, recid))
Beispiel #48
0
def get_recids_to_load():
    """
    Generates the final list of record IDs to load.

    Returns a list of tuples like: (recid, date)
    """
    recids_given = task_get_option("recids", default=[])
    query_given = task_get_option("query")
    reportnumbers_given = task_get_option("reportnumbers")
    if query_given:
        write_message("Performing given search query: %s" % (query_given,))
        result = perform_request_search(p=query_given,
                                        of='id',
                                        rg=0,
                                        wl=0)
        recids_given.extend(result)

    if reportnumbers_given:
        write_message("Searching for records referring to given reportnumbers")
        for reportnumber in reportnumbers_given:
            result = perform_request_search(p='reportnumber:%s' % (reportnumber,),
                                            of='id',
                                            rg=0,
                                            wl=0)
            recids_given.extend(result)

    recids_given = [(recid, None) for recid in recids_given]

    last_id, last_date = fetch_last_updated(name="bibcatalog")
    records_found = []
    if task_get_option("new", default=False):
        records_found.extend(get_all_new_records(since=last_date, last_id=last_id))
    if task_get_option("modified", default=False):
        records_found.extend(get_all_modified_records(since=last_date, last_id=last_id))

    for recid, date in records_found:
        recids_given.append((recid, date))
    return recids_given
    def answer(self, req, user_info, of, cc, colls_to_search, p, f,
               search_units, ln):
        """Answer question given by context.

        Return (relevance, html_string) where relevance is integer
        from 0 to 100 indicating how relevant to the question the
        answer is (see C{CFG_WEBSEARCH_SERVICE_MAX_SERVICE_ANSWER_RELEVANCE}
        for details), and html_string being a formatted answer.
        """
        from invenio.refextract_api import search_from_reference

        _ = gettext_set_language(ln)

        if f or not self.seems_a_journal_reference(p):
            return (0, "")

        (field, pattern) = search_from_reference(p.decode('utf-8'))

        if field is not "journal":
            return (0, "")

        recids = perform_request_search(req=req,
                                        p=pattern,
                                        f=field,
                                        cc=cc,
                                        c=colls_to_search)

        if not recids:
            return (0, "")

        if len(recids) == 1:
            recid = recids.pop()
            user_info = collect_user_info(req)
            return (100, """\
<p><span class="journalhint">%s</span></p>
<table style="padding: 5px; border: 2px solid #ccc; margin: 20px"><tr><td>
%s
</td></tr></table>""" % (escape(_("Were you looking for this paper?")),
                         print_record(recid, ln=ln, user_info=user_info)))

        query = "find rawref \"" + p + "\""
        query_link = CFG_SITE_URL + '/search?' + urlencode({'p': query})
        return (
            80, '<span class="journalhint">%s</span>' %
            (_("Were you looking for a journal reference? Try: %(x_href)s") % {
                "x_href":
                '<a href="{0}">{1}</a>'.format(escape(query_link, True),
                                               escape(query))
            }, ))
Beispiel #50
0
def get_latest_deposits():
	NUMBER_OF_RECORDS = 4;

	ids = perform_request_search(of="id", rg=NUMBER_OF_RECORDS, sf="005", so="d")
	limit_ids = ids[:NUMBER_OF_RECORDS]
	bfo_list = [bibformat_engine.BibFormatObject(id) for id in limit_ids]
	recs = [{
		"id": bfo.recID,
		"date": bfe_creation_date.format_element(bfo),
		"author": bfe_authors.format_element(bfo, "1"),
		"title": bfe_title.format_element(bfo),
		"description": bfe_abstract.format_element(bfo,
			prefix_en="", prefix_fr="", suffix_en="", suffix_fr="",
			limit="", max_chars="72", extension_en="...", extension_fr="..."),
		"category": bfo.field("980__a"),
	} for bfo in bfo_list]
	return recs
Beispiel #51
0
def get_unique_record_json(param):
    """API to query records from the database."""
    from invenio.legacy.search_engine import perform_request_search
    data, query = {}, {}
    data['status'] = 'notfound'

    recid = perform_request_search(p=param)
    if len(recid) == 1:
        query = get_record(recid).dumps(clean=True)
        data['status'] = 'success'
    elif len(recid) > 1:
        data['status'] = 'multiplefound'

    data['source'] = 'database'
    data['query'] = query

    return data
Beispiel #52
0
def get_valid_range(rank_method_code):
    """Return a range of records"""
    write_message("Getting records from collections enabled for rank method.",
                  verbose=9)

    res = run_sql(
        "SELECT collection.name FROM collection, collection_rnkMETHOD, rnkMETHOD WHERE collection.id=id_collection and id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name=%s",
        (rank_method_code, ))
    l_of_colls = []
    for coll in res:
        l_of_colls.append(coll[0])
    if len(l_of_colls) > 0:
        recIDs = perform_request_search(c=l_of_colls)
    else:
        recIDs = []
    valid = intbitset()
    valid += recIDs
    return valid
Beispiel #53
0
def find_similar_videos(recid,
                        collection="Videos",
                        threshold=75,
                        maximum=3,
                        shuffle=True):
    """ Returns a list of similar video records
    """
    similar_records = []
    collection_recids = intbitset(perform_request_search(cc=collection))
    ranking = rank_records('wrd', 0, collection_recids,
                           ['recid:' + str(recid)])
    ## ([6, 7], [81, 100], '(', ')', '')
    for list_pos, rank in enumerate(ranking[1]):
        if rank >= threshold:
            similar_records.append(ranking[0][list_pos])
    if shuffle:
        if maximum > len(similar_records):
            maximum = len(similar_records)
        return random.sample(similar_records, maximum)
    else:
        return similar_records[:maximum]
Beispiel #54
0
def rss(collection, p, jrec, so, rm):
    """Render RSS feed."""
    from invenio.legacy.search_engine import perform_request_search
    of = 'xr'
    argd = wash_search_urlargd(request.args)
    argd['of'] = 'id'

    # update search arguments with the search user preferences
    if 'rg' not in request.values and current_user.get('rg'):
        argd['rg'] = current_user.get('rg')
    rg = int(argd['rg'])

    qid = get_search_query_id(**argd)
    recids = perform_request_search(req=request.get_legacy_request(), **argd)

    ctx = dict(records=len(
        get_current_user_records_that_can_be_displayed(qid)),
               qid=qid,
               rg=rg)

    return response_formated_records(recids, collection, of, **ctx)
Beispiel #55
0
def goto(cc=CFG_SITE_NAME,
         p='',
         f='',
         sf='date',
         so='d',
         docname='',
         format=''):
    """
    Redirect the user to the latest record in the given collection.

    Redirect the user to the latest record in the given collection,
    optionally within the specified pattern and field. If docname
    and format are specified, redirect the user to the corresponding
    docname and format. If docname it is not specified, but there is
    only a single bibdoc attached to the record will redirect to that
    one.
    """
    recids = perform_request_search(cc=cc, p=p, f=f, sf=sf, so=so)
    if recids:
        # The first is the most recent because they are sorted by date
        # descending.
        recid = recids[0]
        url = '/%s/%s' % (CFG_SITE_RECORD, recid)
        if format:
            bibrecdocs = BibRecDocs(recid)
            if not docname:
                if len(bibrecdocs.get_bibdoc_names()) == 1:
                    docname = bibrecdocs.get_bibdoc_names()[0]
                else:
                    return url
            try:
                bibdoc = BibRecDocs(recid).get_bibdoc(docname)
            except InvenioBibDocFileError:
                return url
            try:
                bibdocfile = bibdoc.get_file(format=format)
                return bibdocfile.get_url()
            except InvenioBibDocFileError:
                return url
        return url