Beispiel #1
0
def book_information_from_MARC(recid):
    """
    Retrieve book's information from MARC

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return tuple with title, year, author, isbn and editor.
    """
    # FIXME do the same that book_title_from_MARC

    book_title  = book_title_from_MARC(recid)

    book_year   =   ''.join(get_fieldvalues(recid, "260__c"))


    author_tags = ['100__a', '700__a', '721__a']
    book_author = ''

    for tag in author_tags:
        l = get_fieldvalues(recid, tag)
        for c in l:
            book_author += c + '; '
    book_author = book_author[:-2]

    l = get_fieldvalues(recid, "020__a")
    book_isbn = ''
    for isbn in l:
        book_isbn += isbn + ', '
    book_isbn = book_isbn[:-2]

    book_editor = ', '.join(get_fieldvalues(recid, "260__a") + \
                            get_fieldvalues(recid, "260__b"))

    return (book_title, book_year, book_author, book_isbn, book_editor)
Beispiel #2
0
def check_fresh_record(user_info, recid):
    """
    Check if the record is just submitted (has a record id) but not yet fully in the database.
    The check_user_can_view_record function is doing the same thing, but returns the
    same error code for both cases where the user doesn't have the right to view
    the record and the case when the record is not yet fully submitted.

    @param user_info: the user_info dictionary that describe the user.
    @type user_info: user_info dictionary
    @param recid: the record identifier.
    @type recid: positive integer
    @return: True if the record is fresh, False otherwise
    @rtype: bool
    """

    if isinstance(recid, str):
        recid = int(recid)

    if get_fieldvalues(recid, '8560_f'):
        # The email field is set
        return False
    if get_fieldvalues(recid, '245__a'):
        # It has a title
        return False

    return True
Beispiel #3
0
def check_fresh_record(user_info, recid):
    """
    Check if the record is just submitted (has a record id) but not yet fully in the database.
    The check_user_can_view_record function is doing the same thing, but returns the
    same error code for both cases where the user doesn't have the right to view
    the record and the case when the record is not yet fully submitted.

    @param user_info: the user_info dictionary that describe the user.
    @type user_info: user_info dictionary
    @param recid: the record identifier.
    @type recid: positive integer
    @return: True if the record is fresh, False otherwise
    @rtype: bool
    """

    if isinstance(recid, str):
        recid = int(recid)

    if get_fieldvalues(recid, '8560_f'):
        # The email field is set
        return False
    if get_fieldvalues(recid, '245__a'):
        # It has a title
        return False

    return True
Beispiel #4
0
def build_issns_from_local_site():
    """
    Retrieves the ISSNs from the local database.
    Store the "journal name -> issn" relation.

    Normalize journal names a little bit:
        - strip whithespace chars (left and right)
        - all lower case
        - remove "[Online]" suffix

    Print the result as Python dict structure.
    """

    rec_id_list = perform_request_search(cc='Periodicals', of='id')
    built_issns = {}
    #built_issns = issns # Uncomment this to extend existing issns dict
    # (e.g. in case of manual addition)
    for rec_id in rec_id_list:
        journal_name_list = get_fieldvalues(rec_id, '210__%')
        issn_list = get_fieldvalues(rec_id, '022__a')
        if issn_list:
            issn = issn_list[0]  # There should be only one ISSN
            for journal_name in journal_name_list:
                # Depending on how journal names are entered into the database,
                # you might want to do some processing before saving:
                journal_name = journal_name.lower().strip()
                if journal_name.endswith("[online]"):
                    journal_name = journal_name[:-8].rstrip()

                built_issns[journal_name] = issn

    prtyp = pprint.PrettyPrinter(indent=4)
    prtyp.pprint(built_issns)
Beispiel #5
0
def book_information_from_MARC(recid):
    """
    Retrieve book's information from MARC

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return tuple with title, year, author, isbn and editor.
    """
    # FIXME do the same that book_title_from_MARC

    book_title  = book_title_from_MARC(recid)

    book_year   =   ''.join(get_fieldvalues(recid, "260__c"))


    author_tags = ['100__a', '700__a', '721__a']
    book_author = ''

    for tag in author_tags:
        l = get_fieldvalues(recid, tag)
        for c in l:
            book_author += c + '; '
    book_author = book_author[:-2]

    l = get_fieldvalues(recid, "020__a")
    book_isbn = ''
    for isbn in l:
        book_isbn += isbn + ', '
    book_isbn = book_isbn[:-2]

    book_editor = ', '.join(get_fieldvalues(recid, "260__a") + \
                            get_fieldvalues(recid, "260__b"))

    return (book_title, book_year, book_author, book_isbn, book_editor)
Beispiel #6
0
def build_issns_from_local_site():
    """
    Retrieves the ISSNs from the local database.
    Store the "journal name -> issn" relation.

    Normalize journal names a little bit:
        - strip whithespace chars (left and right)
        - all lower case
        - remove "[Online]" suffix

    Print the result as Python dict structure.
    """

    rec_id_list = perform_request_search(cc='Periodicals',
                                         of='id')
    built_issns = {}
    #built_issns = issns # Uncomment this to extend existing issns dict
                         # (e.g. in case of manual addition)
    for rec_id in rec_id_list:
        journal_name_list = get_fieldvalues(rec_id, '210__%')
        issn_list = get_fieldvalues(rec_id, '022__a')
        if issn_list:
            issn = issn_list[0] # There should be only one ISSN
            for journal_name in journal_name_list:
                # Depending on how journal names are entered into the database,
                # you might want to do some processing before saving:
                journal_name = journal_name.lower().strip()
                if journal_name.endswith("[online]"):
                    journal_name = journal_name[:-8].rstrip()

                built_issns[journal_name] = issn

    prtyp = pprint.PrettyPrinter(indent=4)
    prtyp.pprint(built_issns)
def get_video_thumbnail(recid):
    """ Returns the URL and ALT text for a video thumbnail of a given record
    """
    comments = get_fieldvalues(recid, '8564_z')
    descriptions =  get_fieldvalues(recid, '8564_y')
    urls = get_fieldvalues(recid, '8564_u')
    for pos, comment in enumerate(comments):
        if comment in ('SUGGESTIONTHUMB', 'BIGTHUMB', 'THUMB', 'SMALLTHUMB', 'POSTER'):
            return (urls[pos], descriptions[pos])
    return ("", "")
Beispiel #8
0
def get_video_thumbnail(recid):
    """ Returns the URL and ALT text for a video thumbnail of a given record
    """
    comments = get_fieldvalues(recid, '8564_z')
    descriptions = get_fieldvalues(recid, '8564_y')
    urls = get_fieldvalues(recid, '8564_u')
    for pos, comment in enumerate(comments):
        if comment in ('SUGGESTIONTHUMB', 'BIGTHUMB', 'THUMB', 'SMALLTHUMB',
                       'POSTER'):
            return (urls[pos], descriptions[pos])
    return ("", "")
Beispiel #9
0
def get_authors_from_record(recID, tags):
    """Get all authors for a record

    We need this function because there's 3 different types of authors
    and to fetch each one of them we need look through MARC tags
    """
    authors_list = chain(
         get_fieldvalues(recID, tags['first_author']),
         get_fieldvalues(recID, tags['additional_author']),
         get_fieldvalues(recID, tags['alternative_author_name']))
    authors = set(hash(author) for author in list(authors_list)[:21])

    return authors
Beispiel #10
0
def get_authors_from_record(recID, tags):
    """Get all authors for a record

    We need this function because there's 3 different types of authors
    and to fetch each one of them we need look through MARC tags
    """
    authors_list = chain(
        get_fieldvalues(recID, tags['first_author']),
        get_fieldvalues(recID, tags['additional_author']),
        get_fieldvalues(recID, tags['alternative_author_name']))
    authors = set(hash(author) for author in list(authors_list)[:21])

    return authors
Beispiel #11
0
def get_journal_info(recid, tags):
    record_info = []
    # TODO: handle recors with multiple journals
    tagsvalues = {}  # we store the tags and their values here
                     # like c->444 y->1999 p->"journal of foo",
                     # v->20
    tmp = get_fieldvalues(recid, tags['publication']['journal'])
    if tmp:
        tagsvalues["p"] = tmp[0]
    tmp = get_fieldvalues(recid, tags['publication']['volume'])
    if tmp:
        tagsvalues["v"] = tmp[0]
    tmp = get_fieldvalues(recid, tags['publication']['year'])
    if tmp:
        tagsvalues["y"] = tmp[0]
    tmp = get_fieldvalues(recid, tags['publication']['pages'])
    if tmp:
        # if the page numbers have "x-y" take just x
        pages = tmp[0]
        hpos = pages.find("-")
        if hpos > 0:
            pages = pages[:hpos]
        tagsvalues["c"] = pages

    # check if we have the required data
    ok = True
    for c in tags['publication_format']:
        if c in ('p', 'v', 'y', 'c'):
            if c not in tagsvalues:
                ok = False

    if ok:
        publ = format_journal(tags['publication_format'], tagsvalues)
        record_info += [publ]

        alt_volume = get_alt_volume(tagsvalues['v'])
        if alt_volume:
            tagsvalues2 = tagsvalues.copy()
            tagsvalues2['v'] = alt_volume
            publ = format_journal(tags['publication_format'], tagsvalues2)
            record_info += [publ]

        # Add codens
        for coden in get_kb_mappings('CODENS',
                                     value=tagsvalues['p']):
            tagsvalues2 = tagsvalues.copy()
            tagsvalues2['p'] = coden['key']
            publ = format_journal(tags['publication_format'], tagsvalues2)
            record_info += [publ]

    return record_info
Beispiel #12
0
def proceedings_link(record):
    cnum = record['cnum']
    out = ''
    if not cnum:
        return out
    search_result = Query("cnum:%s and 980__a:proceedings" % (cnum,)).\
        search().recids
    if search_result:
        if len(search_result) > 1:
            from invenio.legacy.bibrecord import get_fieldvalues
            proceedings = []
            for i, recid in enumerate(search_result):
                doi = get_fieldvalues(recid, '0247_a')
                if doi:
                    proceedings.append('<a href="/record/%(ID)s">#%(number)s</a> (DOI: <a href="http://dx.doi.org/%(doi)s">%(doi)s</a>)'
                                       % {'ID': recid, 'number': i + 1, 'doi': doi[0]})
                else:
                    proceedings.append(
                        '<a href="/record/%(ID)s">#%(number)s</a>' % {'ID': recid, 'number': i + 1})
                out = 'Proceedings: '
                out += ', '.join(proceedings)
        elif len(search_result) == 1:
            out += '<a href="/record/' + str(search_result[0]) + \
                '">Proceedings</a>'
    return out
Beispiel #13
0
def update_references(recid, overwrite=True):
    """Update references for a record

    First, we extract references from a record.
    Then, we are not updating the record directly but adding a bibupload
    task in -c mode which takes care of updating the record.

    Parameters:
    * recid: the id of the record
    """

    if not overwrite:
        # Check for references in record
        record = get_record(recid)
        if record and record_has_field(record, '999'):
            raise RecordHasReferences('Record has references and overwrite '
                                      'mode is disabled: %s' % recid)

    if get_fieldvalues(recid, '999C59'):
        raise RecordHasReferences('Record has been curated: %s' % recid)

    # Parse references
    references_xml = extract_references_from_record_xml(recid)

    # Save new record to file
    (temp_fd, temp_path) = mkstemp(prefix=CFG_REFEXTRACT_FILENAME,
                                   dir=CFG_TMPSHAREDDIR)
    temp_file = os.fdopen(temp_fd, 'w')
    temp_file.write(references_xml)
    temp_file.close()

    # Update record
    task_low_level_submission('bibupload', 'refextract', '-P', '4',
                              '-c', temp_path)
Beispiel #14
0
def get_index_strings_by_control_no(control_no):
    """extracts the index-relevant strings from the authority record referenced by
    the 'control_no' parameter and returns it as a list of strings

    @param control_no: a (INVENIO) MARC internal control_no to an authority record
    @type control_no: string (e.g. 'author:(ABC)1234')

    @param expected_type: the type of authority record expected
    @type expected_type: string, e.g. 'author', 'journal' etc.

    @return: list of index-relevant strings from the referenced authority record

    """

    from invenio.legacy.bibindex.engine import list_union

    #return value
    string_list = []
    #1. get recID and authority type corresponding to control_no
    rec_IDs = get_low_level_recIDs_from_control_no(control_no)
    #2. concatenate and return all the info from the interesting fields for this record
    for rec_id in rec_IDs: # in case we get multiple authority records
        for tag in CFG_BIBAUTHORITY_AUTHORITY_SUBFIELDS_TO_INDEX.get(get_type_from_control_no(control_no)):
            new_strings = get_fieldvalues(rec_id, tag)
            string_list = list_union(new_strings, string_list)
    #return
    return string_list
Beispiel #15
0
def is_user_viewer_of_record(user_info, recid):
    """
    Check if the user is allow to view the record based in the marc tags
    inside CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS
    i.e. his email is inside the 506__m tag or he is inside an e-group listed
    in the 506__m tag

    :param user_info: the user_info dictionary that describe the user.
    :type user_info: user_info dictionary
    :param recid: the record identifier.
    :type recid: positive integer
    @return: True if the user is 'allow to view' the record; False otherwise
    @rtype: bool
    """

    authorized_emails_or_group = []
    for tag in cfg.get('CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS', []):
        from invenio.legacy.bibrecord import get_fieldvalues
        authorized_emails_or_group.extend(get_fieldvalues(recid, tag))
    for email_or_group in authorized_emails_or_group:
        if email_or_group in user_info['group']:
            return True
        email = email_or_group.strip().lower()
        if user_info['email'].strip().lower() == email:
            return True
    return False
Beispiel #16
0
def is_user_owner_of_record(user_info, recid):
    """Check if the user is owner of the record.

    I.e. he is the submitter and/or belongs to a owner-like group authorized
    to 'see' the record.

    :param user_info: the user_info dictionary that describe the user.
    :type user_info: user_info dictionary
    :param recid: the record identifier.
    :type recid: positive integer
    :return: True if the user is 'owner' of the record; False otherwise
    """
    authorized_emails_or_group = []
    for tag in cfg.get('CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS', []):
        from invenio.legacy.bibrecord import get_fieldvalues
        authorized_emails_or_group.extend(get_fieldvalues(recid, tag))
    for email_or_group in authorized_emails_or_group:
        if email_or_group in user_info['group']:
            return True
        email = email_or_group.strip().lower()
        if user_info['email'].strip().lower() == email:
            return True
        if cfg['CFG_CERN_SITE']:
            # the egroup might be in the form [email protected]
            if email_or_group.replace('@cern.ch', ' [CERN]') in \
                    user_info['group']:
                return True
    return False
Beispiel #17
0
def is_user_owner_of_record(user_info, recid):
    """Check if the user is owner of the record.

    I.e. he is the submitter and/or belongs to a owner-like group authorized
    to 'see' the record.

    :param user_info: the user_info dictionary that describe the user.
    :type user_info: user_info dictionary
    :param recid: the record identifier.
    :type recid: positive integer
    :return: True if the user is 'owner' of the record; False otherwise
    """
    authorized_emails_or_group = []
    for tag in cfg.get('CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS', []):
        from invenio.legacy.bibrecord import get_fieldvalues
        authorized_emails_or_group.extend(get_fieldvalues(recid, tag))
    for email_or_group in authorized_emails_or_group:
        if email_or_group in user_info['group']:
            return True
        email = email_or_group.strip().lower()
        if user_info['email'].strip().lower() == email:
            return True
        if cfg['CFG_CERN_SITE']:
            # the egroup might be in the form [email protected]
            if email_or_group.replace('@cern.ch', ' [CERN]') in \
                    user_info['group']:
                return True
    return False
Beispiel #18
0
def is_user_viewer_of_record(user_info, recid):
    """
    Check if the user is allow to view the record based in the marc tags
    inside CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS
    i.e. his email is inside the 506__m tag or he is inside an e-group listed
    in the 506__m tag

    :param user_info: the user_info dictionary that describe the user.
    :type user_info: user_info dictionary
    :param recid: the record identifier.
    :type recid: positive integer
    @return: True if the user is 'allow to view' the record; False otherwise
    @rtype: bool
    """

    authorized_emails_or_group = []
    for tag in cfg.get('CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS', []):
        from invenio.legacy.bibrecord import get_fieldvalues
        authorized_emails_or_group.extend(get_fieldvalues(recid, tag))
    for email_or_group in authorized_emails_or_group:
        if email_or_group in user_info['group']:
            return True
        email = email_or_group.strip().lower()
        if user_info['email'].strip().lower() == email:
            return True
    return False
Beispiel #19
0
def is_periodical(recid):
    rec_type = get_fieldvalues(recid, "690C_a")
    if len(rec_type) > 0:
        for value in rec_type:
            if value == 'PERI':
                return True

    return False
Beispiel #20
0
def get_authors_from_record(recID, tags,
                                use_bibauthorid=CFG_BIBRANK_SELFCITES_USE_BIBAUTHORID):
    """Get all authors for a record

    We need this function because there's 3 different types of authors
    and to fetch each one of them we need look through MARC tags
    """
    if use_bibauthorid:
        authors = get_personids_from_record(recID)
    else:
        authors_list = chain(
             get_fieldvalues(recID, tags['first_author']),
             get_fieldvalues(recID, tags['additional_author']),
             get_fieldvalues(recID, tags['alternative_author_name']))
        authors = set(hash(author) for author in list(authors_list)[:20])

    return authors
Beispiel #21
0
def is_periodical(recid):
    rec_type = get_fieldvalues(recid, "690C_a")
    if len(rec_type) > 0:
        for value in rec_type:
            if value == 'PERI':
                return True

    return False
Beispiel #22
0
def _record_in_files_p(recid, filenames):
    """Search XML files for given record."""
    # Get id tags of record in question
    rec_oaiid = rec_sysno = -1
    rec_oaiid_tag = get_fieldvalues(recid, OAIID_TAG)
    rec_sysno_tag = get_fieldvalues(recid, SYSNO_TAG)
    if rec_sysno_tag:
        rec_sysno = rec_sysno_tag[0]

    # For each record in each file, compare ids and abort if match is found
    for filename in filenames:
        try:
            if CFG_BIBEDIT_QUEUE_CHECK_METHOD == 'regexp':
                # check via regexp: this is fast, but may not be precise
                file_content = open(filename).read()
                re_match_001 = re.compile(
                    '<controlfield tag="001">%s</controlfield>' % (recid))
                if re_match_001.search(file_content):
                    return True
                for rec_oaiid in rec_oaiid_tag:
                    re_match_oaiid = re.compile(
                        r'<datafield tag="%s" ind1=" " ind2=" ">(\s*<subfield code="a">\s*|\s*<subfield code="9">\s*.*\s*</subfield>\s*<subfield code="a">\s*)%s'
                        % (OAIID_TAG[0:3], re.escape(rec_oaiid)))
                    if re_match_oaiid.search(file_content):
                        return True
                re_match_sysno = re.compile(
                    r'<datafield tag="%s" ind1=" " ind2=" ">(\s*<subfield code="a">\s*|\s*<subfield code="9">\s*.*\s*</subfield>\s*<subfield code="a">\s*)%s'
                    % (SYSNO_TAG[0:3], re.escape(str(rec_sysno))))
                if rec_sysno_tag:
                    if re_match_sysno.search(file_content):
                        return True
            else:
                # by default, check via bibrecord: this is accurate, but may be slow
                file_ = open(filename)
                records = create_records(file_.read(), 0, 0)
                for i in range(0, len(records)):
                    record, all_good = records[i][:2]
                    if record and all_good:
                        if _record_has_id_p(record, recid, rec_oaiid,
                                            rec_sysno):
                            return True
                file_.close()
        except IOError:
            continue
    return False
Beispiel #23
0
def _record_in_files_p(recid, filenames):
    """Search XML files for given record."""
    # Get id tags of record in question
    rec_oaiid = rec_sysno = -1
    rec_oaiid_tag = get_fieldvalues(recid, OAIID_TAG)
    rec_sysno_tag = get_fieldvalues(recid, SYSNO_TAG)
    if rec_sysno_tag:
        rec_sysno = rec_sysno_tag[0]

    # For each record in each file, compare ids and abort if match is found
    for filename in filenames:
        try:
            if CFG_BIBEDIT_QUEUE_CHECK_METHOD == "regexp":
                # check via regexp: this is fast, but may not be precise
                file_content = open(filename).read()
                re_match_001 = re.compile('<controlfield tag="001">%s</controlfield>' % (recid))
                if re_match_001.search(file_content):
                    return True
                for rec_oaiid in rec_oaiid_tag:
                    re_match_oaiid = re.compile(
                        r'<datafield tag="%s" ind1=" " ind2=" ">(\s*<subfield code="a">\s*|\s*<subfield code="9">\s*.*\s*</subfield>\s*<subfield code="a">\s*)%s'
                        % (OAIID_TAG[0:3], re.escape(rec_oaiid))
                    )
                    if re_match_oaiid.search(file_content):
                        return True
                re_match_sysno = re.compile(
                    r'<datafield tag="%s" ind1=" " ind2=" ">(\s*<subfield code="a">\s*|\s*<subfield code="9">\s*.*\s*</subfield>\s*<subfield code="a">\s*)%s'
                    % (SYSNO_TAG[0:3], re.escape(str(rec_sysno)))
                )
                if rec_sysno_tag:
                    if re_match_sysno.search(file_content):
                        return True
            else:
                # by default, check via bibrecord: this is accurate, but may be slow
                file_ = open(filename)
                records = create_records(file_.read(), 0, 0)
                for i in range(0, len(records)):
                    record, all_good = records[i][:2]
                    if record and all_good:
                        if _record_has_id_p(record, recid, rec_oaiid, rec_sysno):
                            return True
                file_.close()
        except IOError:
            continue
    return False
Beispiel #24
0
    def deleted(self):
        """Return True if record is marked as deleted."""
        from invenio.legacy.bibrecord import get_fieldvalues
        # record exists; now check whether it isn't marked as deleted:
        dbcollids = get_fieldvalues(self.id, "980__%")

        return ("DELETED" in dbcollids) or \
               (current_app.config.get('CFG_CERN_SITE')
                and "DUMMY" in dbcollids)
Beispiel #25
0
def search_result_info(recid):
    """Return report number of a record or if it doen't exist return the recid
    itself.
    """
    report_numbers = get_fieldvalues(recid, '037__a')
    if len(report_numbers) == 0:
        return "#"+str(recid)
    else:
        return report_numbers[0]
Beispiel #26
0
def search_result_info(recid):
    """Return report number of a record or if it doen't exist return the recid
    itself.
    """
    report_numbers = get_fieldvalues(recid, '037__a')
    if len(report_numbers) == 0:
        return "#" + str(recid)
    else:
        return report_numbers[0]
Beispiel #27
0
def check_authorized_tags(recid, tags, test_func):
    """Check if tags in record matches a given test."""
    authorized_values = []
    for tag in tags:
        authorized_values.extend(get_fieldvalues(recid, tag))

    for value in authorized_values:
        if test_func(value):
            return True
    return False
def get_video_duration(recid):
    """ Return the duration of a video
    """
    duration = get_fieldvalues(recid, '950__d')
    if duration:
        duration = duration[0]
        duration = timecode_to_seconds(duration)
        return human_readable_time(duration)
    else:
        return ""
Beispiel #29
0
def check_authorized_tags(recid, tags, test_func):
    """Check if tags in record matches a given test."""
    authorized_values = []
    for tag in tags:
        authorized_values.extend(get_fieldvalues(recid, tag))

    for value in authorized_values:
        if test_func(value):
            return True
    return False
Beispiel #30
0
def get_video_duration(recid):
    """ Return the duration of a video
    """
    duration = get_fieldvalues(recid, '950__d')
    if duration:
        duration = duration[0]
        duration = timecode_to_seconds(duration)
        return human_readable_time(duration)
    else:
        return ""
Beispiel #31
0
def get_control_nos_from_recID(recID):
    """
    get a list of control numbers from the record ID

    @param recID: record ID
    @type recID: int

    @return: authority record control number
    """
    return get_fieldvalues(recID, CFG_BIBAUTHORITY_RECORD_CONTROL_NUMBER_FIELD,
                           repetitive_values=False)
Beispiel #32
0
def check_record_for_refextract(recid):
    if get_fieldvalues(recid, '999C6v'):
        # References extracted by refextract
        if get_fieldvalues(recid, '999C59'):
            # They have been curated
            # To put in the HP and create ticket in the future
            needs_submitting = False
        else:
            # They haven't been curated, we safely extract from the new pdf
            needs_submitting = True
    elif not get_fieldvalues(recid, '999C5_'):
        # No references in the record, we can safely extract
        # new references
        needs_submitting = True
    else:
        # Old record, with either no curated references or references
        # curated by SLAC. We cannot distinguish, so we do nothing
        needs_submitting = False

    return needs_submitting
Beispiel #33
0
    def _next_merged_recid(recid):
        """ Returns the ID of record merged with record with ID = recid """
        from invenio.legacy.bibrecord import get_fieldvalues
        merged_recid = None
        for val in get_fieldvalues(recid, "970__d"):
            try:
                merged_recid = int(val)
                break
            except ValueError:
                pass

        if not merged_recid:
            return None
        else:
            return merged_recid
Beispiel #34
0
    def _next_merged_recid(recid):
        """Return the ID of record merged with record with ID = recid."""
        from invenio.legacy.bibrecord import get_fieldvalues
        merged_recid = None
        for val in get_fieldvalues(recid, "970__d"):
            try:
                merged_recid = int(val)
                break
            except ValueError:
                pass

        if not merged_recid:
            return None
        else:
            return merged_recid
Beispiel #35
0
def get_merged_recid(recID):
    """ Return the record ID of the record with
    which the given record has been merged.
    @param recID: deleted record recID
    @type recID: int
    @return: merged record recID
    @rtype: int or None
    """
    merged_recid = None
    for val in get_fieldvalues(recID, "970__d"):
        try:
            merged_recid = int(val)
            break
        except ValueError:
            pass
    return merged_recid
Beispiel #36
0
def get_merged_recid(recID):
    """ Return the record ID of the record with
    which the given record has been merged.
    @param recID: deleted record recID
    @type recID: int
    @return: merged record recID
    @rtype: int or None
    """
    merged_recid = None
    for val in get_fieldvalues(recID, "970__d"):
        try:
            merged_recid = int(val)
            break
        except ValueError:
            pass
    return merged_recid
Beispiel #37
0
def record_exists(recID):
    """Return 1 if record RECID exists.
       Return 0 if it doesn't exist.
       Return -1 if it exists but is marked as deleted.
       Copy from search_engine"""
    out = 0
    query = "SELECT id FROM bibrec WHERE id='%s'" % recID
    res = run_sql(query, None, 1)
    if res:
        # record exists; now check whether it isn't marked as deleted:
        dbcollids = get_fieldvalues(recID, "980__%")
        if ("DELETED" in dbcollids) or (CFG_CERN_SITE and "DUMMY" in dbcollids):
            out = -1 # exists, but marked as deleted
        else:
            out = 1 # exists fine
    return out
Beispiel #38
0
def guess_primary_collection_of_a_record(recID):
    """Return primary collection name a record recid belongs to, by
       testing 980 identifier.
       May lead to bad guesses when a collection is defined dynamically
       via dbquery.
       In that case, return 'CFG_SITE_NAME'."""
    out = CFG_SITE_NAME
    dbcollids = get_fieldvalues(recID, "980__a")
    for dbcollid in dbcollids:
        variants = ("collection:" + dbcollid, 'collection:"' + dbcollid + '"',
                    "980__a:" + dbcollid, '980__a:"' + dbcollid + '"',
                    '980:' + dbcollid, '980:"' + dbcollid + '"')
        res = run_sql(
            "SELECT name FROM collection WHERE dbquery IN (%s,%s,%s,%s,%s,%s)",
            variants)
        if res:
            out = res[0][0]
            break
    if CFG_CERN_SITE:
        recID = int(recID)
        # dirty hack for ATLAS collections at CERN:
        if out in ('ATLAS Communications', 'ATLAS Internal Notes'):
            for alternative_collection in (
                    'ATLAS Communications Physics',
                    'ATLAS Communications General',
                    'ATLAS Internal Notes Physics',
                    'ATLAS Internal Notes General',
            ):
                if recID in get_collection_reclist(alternative_collection):
                    return alternative_collection

        # dirty hack for FP
        FP_collections = {
            'DO': ['Current Price Enquiries', 'Archived Price Enquiries'],
            'IT': [
                'Current Invitation for Tenders',
                'Archived Invitation for Tenders'
            ],
            'MS': ['Current Market Surveys', 'Archived Market Surveys']
        }
        fp_coll_ids = [coll for coll in dbcollids if coll in FP_collections]
        for coll in fp_coll_ids:
            for coll_name in FP_collections[coll]:
                if recID in get_collection_reclist(coll_name):
                    return coll_name

    return out
Beispiel #39
0
def guess_primary_collection_of_a_record(recID):
    """Return primary collection name a record recid belongs to, by
       testing 980 identifier.
       May lead to bad guesses when a collection is defined dynamically
       via dbquery.
       In that case, return 'CFG_SITE_NAME'."""
    out = CFG_SITE_NAME
    dbcollids = get_fieldvalues(recID, "980__a")
    for dbcollid in dbcollids:
        variants = (
            "collection:" + dbcollid,
            'collection:"' + dbcollid + '"',
            "980__a:" + dbcollid,
            '980__a:"' + dbcollid + '"',
            "980:" + dbcollid,
            '980:"' + dbcollid + '"',
        )
        res = run_sql("SELECT name FROM collection WHERE dbquery IN (%s,%s,%s,%s,%s,%s)", variants)
        if res:
            out = res[0][0]
            break
    if CFG_CERN_SITE:
        recID = int(recID)
        # dirty hack for ATLAS collections at CERN:
        if out in ("ATLAS Communications", "ATLAS Internal Notes"):
            for alternative_collection in (
                "ATLAS Communications Physics",
                "ATLAS Communications General",
                "ATLAS Internal Notes Physics",
                "ATLAS Internal Notes General",
            ):
                if recID in get_collection_reclist(alternative_collection):
                    return alternative_collection

        # dirty hack for FP
        FP_collections = {
            "DO": ["Current Price Enquiries", "Archived Price Enquiries"],
            "IT": ["Current Invitation for Tenders", "Archived Invitation for Tenders"],
            "MS": ["Current Market Surveys", "Archived Market Surveys"],
        }
        fp_coll_ids = [coll for coll in dbcollids if coll in FP_collections]
        for coll in fp_coll_ids:
            for coll_name in FP_collections[coll]:
                if recID in get_collection_reclist(coll_name):
                    return coll_name

    return out
Beispiel #40
0
def record_exists(recID):
    """Return 1 if record RECID exists.
       Return 0 if it doesn't exist.
       Return -1 if it exists but is marked as deleted.
       Copy from search_engine"""
    out = 0
    query = "SELECT id FROM bibrec WHERE id='%s'" % recID
    res = run_sql(query, None, 1)
    if res:
        # record exists; now check whether it isn't marked as deleted:
        dbcollids = get_fieldvalues(recID, "980__%")
        if ("DELETED" in dbcollids) or (CFG_CERN_SITE
                                        and "DUMMY" in dbcollids):
            out = -1  # exists, but marked as deleted
        else:
            out = 1  # exists fine
    return out
Beispiel #41
0
def get_field_count(recID, tags):
    """
    Return number of field instances having TAGS in record RECID.

    @param recID: record ID
    @type recID: int
    @param tags: list of tags to count, e.g. ['100__a', '700__a']
    @type tags: list
    @return: number of tags present in record
    @rtype: int
    @note: Works internally via getting field values, which may not be
           very efficient.  Could use counts only, or else retrieve stored
           recstruct format of the record and walk through it.
    """
    out = 0
    for tag in tags:
        out += len(get_fieldvalues(recID, tag))
    return out
Beispiel #42
0
def get_field_count(recID, tags):
    """
    Return number of field instances having TAGS in record RECID.

    @param recID: record ID
    @type recID: int
    @param tags: list of tags to count, e.g. ['100__a', '700__a']
    @type tags: list
    @return: number of tags present in record
    @rtype: int
    @note: Works internally via getting field values, which may not be
           very efficient.  Could use counts only, or else retrieve stored
           recstruct format of the record and walk through it.
    """
    out = 0
    for tag in tags:
        out += len(get_fieldvalues(recID, tag))
    return out
Beispiel #43
0
def guess_authority_types(recID):
    """
    guesses the type(s) (e.g. AUTHOR, INSTITUTE, etc.)
    of an authority record (should only have one value)

    @param recID: the record ID of the authority record
    @type recID: int

    @return: list of strings
    """
    types = get_fieldvalues(recID,
                            '980__a',
                            repetitive_values=False) # remove possible duplicates !

    #filter out unwanted information
    while CFG_BIBAUTHORITY_AUTHORITY_COLLECTION_IDENTIFIER in types:
        types.remove(CFG_BIBAUTHORITY_AUTHORITY_COLLECTION_IDENTIFIER)
    types = [_type for _type in types if _type.isalpha()]

    return types
Beispiel #44
0
def guess_main_name_from_authority_recID(recID):
    """
    get the main name of the authority record

    @param recID: the record ID of authority record
    @type recID: int

    @return: the main name of this authority record (string)
    """
    #tags where the main authority record name can be found
    main_name_tags = ['100__a', '110__a', '130__a', '150__a']
    main_name = ''
    # look for first match only
    for tag in main_name_tags:
        fieldvalues = get_fieldvalues(recID, tag, repetitive_values=False)
        if len(fieldvalues):
            main_name = fieldvalues[0]
            break
    # return first match, if found
    return main_name
Beispiel #45
0
def guess_primary_collection_of_a_record(recID):
    """Return primary collection name a record recid belongs to, by
       testing 980 identifier.
       May lead to bad guesses when a collection is defined dynamically
       via dbquery.
       In that case, return 'CFG_SITE_NAME'."""
    out = CFG_SITE_NAME
    dbcollids = get_fieldvalues(recID, "980__a")
    for dbcollid in dbcollids:
        variants = ("collection:" + dbcollid,
                    'collection:"' + dbcollid + '"',
                    "980__a:" + dbcollid,
                    '980__a:"' + dbcollid + '"',
                    '980:' + dbcollid ,
                    '980:"' + dbcollid + '"')
        res = run_sql("SELECT name FROM collection WHERE dbquery IN (%s,%s,%s,%s,%s,%s)", variants)
        if res:
            out = res[0][0]
            break

    return out
Beispiel #46
0
def record_exists(recID):
    """Return 1 if record RECID exists.
       Return 0 if it doesn't exist.
       Return -1 if it exists but is marked as deleted.
    """
    from invenio.config import CFG_CERN_SITE
    try: # if recid is '123foo', mysql will return id=123, and we don't want that
        recID = int(recID)
    except (ValueError, TypeError):
        return 0

    out = 0
    res = run_sql("SELECT id FROM bibrec WHERE id=%s", (recID,), 1)
    if res:
        # record exists; now check whether it isn't marked as deleted:
        dbcollids = get_fieldvalues(recID, "980__%")
        if ("DELETED" in dbcollids) or (CFG_CERN_SITE and "DUMMY" in dbcollids):
            out = -1 # exists, but marked as deleted
        else:
            out = 1 # exists fine
    return out
Beispiel #47
0
def record_exists(recID):
    """Return 1 if record RECID exists.
       Return 0 if it doesn't exist.
       Return -1 if it exists but is marked as deleted.
    """
    from invenio.config import CFG_CERN_SITE
    try:  # if recid is '123foo', mysql will return id=123, and we don't want that
        recID = int(recID)
    except (ValueError, TypeError):
        return 0

    out = 0
    res = run_sql("SELECT id FROM bibrec WHERE id=%s", (recID, ), 1)
    if res:
        # record exists; now check whether it isn't marked as deleted:
        dbcollids = get_fieldvalues(recID, "980__%")
        if ("DELETED" in dbcollids) or (CFG_CERN_SITE
                                        and "DUMMY" in dbcollids):
            out = -1  # exists, but marked as deleted
        else:
            out = 1  # exists fine
    return out
Beispiel #48
0
def book_title_from_MARC(recid):
    """
    Retrieve book's title from MARC

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return book's title
    """

    title_tags = get_field_tags('title')

    book_title = ''
    i = 0
    while book_title == '' and i < len(title_tags):
        l = get_fieldvalues(recid, title_tags[i])
        for candidate in l:
            book_title = book_title + candidate + ': '
        i += 1

    book_title = book_title[:-2]

    return book_title
Beispiel #49
0
def book_title_from_MARC(recid):
    """
    Retrieve book's title from MARC

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return book's title
    """

    title_tags = get_field_tags('title')

    book_title = ''
    i = 0
    while book_title == '' and i < len(title_tags):
        l = get_fieldvalues(recid, title_tags[i])
        for candidate in l:
            book_title = book_title + candidate + ': '
        i += 1

    book_title = book_title[:-2]

    return book_title
Beispiel #50
0
def get_item_info_for_search_result(recid):
    """
    Get the item's info from MARC in order to create a
    search result with more details

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return book's informations (author, editor and number of copies)
    """

    book_author = '  '.join(get_fieldvalues(recid, "100__a") + \
                            get_fieldvalues(recid, "100__u"))

    book_editor = ' , '.join(get_fieldvalues(recid, "260__a") + \
                             get_fieldvalues(recid, "260__b") + \
                             get_fieldvalues(recid, "260__c"))

    book_copies = '  '.join(get_fieldvalues(recid, "964__a"))

    book_infos = (book_author, book_editor, book_copies)

    return book_infos
Beispiel #51
0
 def check_arxiv(recid):
     """Returns True for arxiv papers"""
     for report_number in get_fieldvalues(recid, '037__9'):
         if report_number == 'arXiv':
             return True
     return False
Beispiel #52
0
def get_most_popular_field_values(recids, tags, exclude_values=None,
                                  count_repetitive_values=True, split_by=0):
    """Analyze RECIDS and look for TAGS and return most popular values.

    Optionally return the frequency with which they occur sorted according to
    descending frequency.

    If a value is found in EXCLUDE_VALUES, then do not count it.

    If COUNT_REPETITIVE_VALUES is True, then we count every occurrence
    of value in the tags.  If False, then we count the value only once
    regardless of the number of times it may appear in a record.
    (But, if the same value occurs in another record, we count it, of
    course.)

    Example:

    .. code-block:: python

        >>> get_most_popular_field_values(range(11,20), '980__a')
        [('PREPRINT', 10), ('THESIS', 7), ...]
        >>> get_most_popular_field_values(range(11,20), ('100__a', '700__a'))
        [('Ellis, J', 10), ('Ellis, N', 7), ...]
        >>> get_most_popular_field_values(range(11,20), ('100__a', '700__a'),
        ... ('Ellis, J'))
        [('Ellis, N', 7), ...]

    :return: list of tuples containing tag and its frequency
    """
    from invenio.legacy.bibrecord import get_fieldvalues

    valuefreqdict = {}
    # sanity check:
    if not exclude_values:
        exclude_values = []
    if isinstance(tags, string_types):
        tags = (tags,)
    # find values to count:
    vals_to_count = []
    displaytmp = {}
    if count_repetitive_values:
        # counting technique A: can look up many records at once: (very fast)
        for tag in tags:
            vals_to_count.extend(get_fieldvalues(recids, tag, sort=False,
                                                 split_by=split_by))
    else:
        # counting technique B: must count record-by-record: (slow)
        for recid in recids:
            vals_in_rec = []
            for tag in tags:
                for val in get_fieldvalues(recid, tag, False):
                    vals_in_rec.append(val)
            # do not count repetitive values within this record
            # (even across various tags, so need to unify again):
            dtmp = {}
            for val in vals_in_rec:
                dtmp[val.lower()] = 1
                displaytmp[val.lower()] = val
            vals_in_rec = dtmp.keys()
            vals_to_count.extend(vals_in_rec)
    # are we to exclude some of found values?
    for val in vals_to_count:
        if val not in exclude_values:
            if val in valuefreqdict:
                valuefreqdict[val] += 1
            else:
                valuefreqdict[val] = 1
    # sort by descending frequency of values:
    f = []   # frequencies
    n = []   # original names
    ln = []  # lowercased names
    # build lists within one iteration
    for (val, freq) in iteritems(valuefreqdict):
        f.append(-1 * freq)
        if val in displaytmp:
            n.append(displaytmp[val])
        else:
            n.append(val)
        ln.append(val.lower())
    # sort by frequency (desc) and then by lowercased name.
    return [(n[i], -1 * f[i]) for i in numpy.lexsort([ln, f])]