コード例 #1
0
ファイル: test_bibfield.py プロジェクト: PXke/inspire-next
    def test_get_legacy_recstruct(self):
        """bibfield - legacy functions"""
        from invenio.legacy.search_engine import get_record as search_engine_get_record
        from invenio.legacy.bibrecord import record_get_field_value

        bibfield_recstruct = get_record(8).get_legacy_recstruct()
        bibrecord = search_engine_get_record(8)

        self.assertEqual(record_get_field_value(bibfield_recstruct, '100', code='a'),
                         record_get_field_value(bibrecord, '100', code='a'))
        self.assertEqual(len(bibfield_recstruct['999']), len(bibrecord['999']))
コード例 #2
0
ファイル: utils.py プロジェクト: jiangmin9/invenio
def _record_has_id_p(record, recid, rec_oaiid, rec_sysno):
    """Check if record matches any of the given IDs."""
    if record_has_field(record, "001"):
        if record_get_field_value(record, "001", "%", "%") == str(recid):
            return True
    if record_has_field(record, OAIID_TAG[0:3]):
        if record_get_field_value(record, OAIID_TAG[0:3], OAIID_TAG[3], OAIID_TAG[4], OAIID_TAG[5]) == rec_oaiid:
            return True
    if record_has_field(record, SYSNO_TAG[0:3]):
        if record_get_field_value(record, SYSNO_TAG[0:3], SYSNO_TAG[3], SYSNO_TAG[4], SYSNO_TAG[5]) == rec_sysno:
            return True
    return False
コード例 #3
0
def _record_has_id_p(record, recid, rec_oaiid, rec_sysno):
    """Check if record matches any of the given IDs."""
    if record_has_field(record, '001'):
        if record_get_field_value(record, '001', '%', '%') == str(recid):
            return True
    if record_has_field(record, OAIID_TAG[0:3]):
        if (record_get_field_value(record, OAIID_TAG[0:3], OAIID_TAG[3],
                                   OAIID_TAG[4], OAIID_TAG[5]) == rec_oaiid):
            return True
    if record_has_field(record, SYSNO_TAG[0:3]):
        if (record_get_field_value(record, SYSNO_TAG[0:3], SYSNO_TAG[3],
                                   SYSNO_TAG[4], SYSNO_TAG[5]) == rec_sysno):
            return True
    return False
コード例 #4
0
def perform_get_holdings_information(recid,
                                     req,
                                     action="borrowal",
                                     ln=CFG_SITE_LANG):
    """
    Display all the copies of an item. If the parameter action is 'proposal', display
    appropriate information to the user.

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @param action: Specifies whether the current record is put up to solicit acquisition
    proposals(if "proposal") or not("borrowal").
    @type proposal: string

    @return body(html)
    """
    _ = gettext_set_language(ln)

    if action == "proposal":
        tag = AMZ_BOOK_PUBLICATION_DATE_TAG
        publication_date = record_get_field_value(get_record(recid),
                                                  tag[:3],
                                                  ind1=tag[3],
                                                  ind2=tag[4],
                                                  code=tag[5])
        msg = ''
        if publication_date:
            cur_date = datetime.date.today()
            try:
                pub_date = time.strptime(publication_date, '%d %b %Y')
                pub_date = datetime.date(pub_date[0], pub_date[1], pub_date[2])
                if cur_date < pub_date:
                    msg += _(
                        "The publication date of this book is %(x_date)s.",
                        x_date=(publication_date))
                    msg += "<br /><br />"
                else:
                    msg += _("This book has no copies in the library. ")
            except:
                msg += _("This book has no copies in the library. ")

        msg += _(
            "If you think this book is interesting, suggest it and tell us why you consider this \
                  book is important. The library will consider your opinion and if we decide to buy the \
                  book, we will issue a loan for you as soon as it arrives and send it by internal mail."
        )
        msg += "<br \><br \>"
        msg += _(
            "In case we decide not to buy the book, we will offer you an interlibrary loan"
        )

        body = bc_templates.tmpl_book_proposal_information(recid, msg, ln=ln)
    else:
        holdings_information = db.get_holdings_information(recid, False)
        body = bc_templates.tmpl_holdings_information(
            recid=recid, req=req, holdings_info=holdings_information, ln=ln)

    return body
コード例 #5
0
def get_xml_from_textmarc(recid, textmarc_record, uid=None):
    """
    Convert textmarc to marcxml and return the result of the conversion

    @param recid: id of the record that is being converted
    @type: int

    @param textmarc_record: record content in textmarc format
    @type: string

    @return: dictionary with the following keys:
            * resultMsg: message describing conversion status
            * resultXML: xml resulting from conversion
            * parse_error: in case of error, a description of it
    @rtype: dict
    """
    response = {}
    # Let's remove empty lines
    textmarc_record = os.linesep.join(
        [s for s in textmarc_record.splitlines() if s])

    # Create temp file with textmarc to be converted by textmarc2xmlmarc
    (file_descriptor, file_name) = tempfile.mkstemp()
    f = os.fdopen(file_descriptor, "w")

    # If there is a cache file, add the controlfields
    if cache_exists(recid, uid):
        record = get_cache_contents(recid, uid)[2]
        for tag in record:
            if tag.startswith("00") and tag != "001":  # It is a controlfield
                f.write(
                    "%09d %s %s\n" %
                    (recid, tag + "__", record_get_field_value(record, tag)))

    # Write content appending sysno at beginning
    for line in textmarc_record.splitlines():
        f.write("%09d %s\n" % (recid, re.sub(r"\s+", " ", line.strip())))
    f.close()

    old_stdout = sys.stdout
    try:
        # Redirect output, transform, restore old references
        new_stdout = StringIO()
        sys.stdout = new_stdout
        try:
            transform_file(file_name)
            response['resultMsg'] = 'textmarc_parsing_success'
            response['resultXML'] = new_stdout.getvalue()
        except ParseError as e:
            # Something went wrong, notify user
            response['resultXML'] = ""
            response['resultMsg'] = 'textmarc_parsing_error'
            response['parse_error'] = [
                e.lineno, " ".join(e.linecontent.split()[1:]), e.message
            ]
    finally:
        sys.stdout = old_stdout

    return response
コード例 #6
0
ファイル: marcxml_tasks.py プロジェクト: mhellmic/b2share
def add_metadata_to_extra_data(obj, eng):
    """
    Creates bibrecord from object data and
    populates extra_data with metadata
    :param obj: Bibworkflow Object to process
    :param eng: BibWorkflowEngine processing the object
    """
    obj.extra_data["_last_task_name"] = "add_metadata_to_extra_data"
    from invenio.legacy.bibrecord import create_record as old_create_record, record_get_field_value

    record = old_create_record(obj.data)
    obj.extra_data['redis_search']['category'] = \
        record_get_field_value(record[0], '037', code='c')
    obj.extra_data['redis_search']['title'] = \
        record_get_field_value(record[0], '245', code='a')
    obj.extra_data['redis_search']['source'] = \
        record_get_field_value(record[0], '035', code='9')
コード例 #7
0
ファイル: engine.py プロジェクト: derekstrom/invenio
    def __init__(self, recID, ln=CFG_SITE_LANG, search_pattern=None,
                 xml_record=None, user_info=None, output_format=''):
        """
        Creates a new bibformat object, with given record.

        You can either specify an record ID to format, or give its xml representation.
        if 'xml_record' is not None, use 'xml_record' instead of recID for the record.

        'user_info' allows to grant access to some functionalities on
        a page depending on the user's priviledges. It is a dictionary
        in the following form::

            user_info = {
                'remote_ip' : '',
                'remote_host' : '',
                'referer' : '',
                'uri' : '',
                'agent' : '',
                'uid' : -1,
                'nickname' : '',
                'email' : '',
                'group' : [],
                'guest' : '1'
                }

        :param recID: the id of a record
        :param ln: the language in which the record has to be formatted
        :param search_pattern: list of string representing the request used by the user in web interface
        :param xml_record: a xml string of the record to format
        :param user_info: the information of the user who will view the formatted page
        :param output_format: the output_format used for formatting this record
        """
        self.xml_record = None # *Must* remain empty if recid is given
        if xml_record is not None:
            # If record is given as parameter
            self.xml_record = xml_record
            self.record = create_record(xml_record)[0]
            recID = record_get_field_value(self.record, "001") or None
            recID = int(recID) if recID is not None else recID

        try:
            assert isinstance(recID, (int, long, type(None))), 'Argument of wrong type!'
        except AssertionError:
            register_exception(prefix="recid needs to be an integer in BibFormatObject",
                               alert_admin=True)
            recID = int(recID)
        self.recID = recID
        self.lang = wash_language(ln)
        if search_pattern is None:
            search_pattern = []
        self.search_pattern = search_pattern
        self.output_format = output_format
        self.user_info = user_info
        if self.user_info is None:
            from invenio.ext.login.legacy_user import UserInfo
            self.user_info = UserInfo(None)
コード例 #8
0
def retrieve_field_values(curdir,
                          field_name,
                          separator=None,
                          system_number_file='SN',
                          tag=None):
    """
    This is a handy function to retrieve values either from the current
    submission directory, when a form has been just submitted, or from
    an existing record (e.g. during MBI action).

    @param curdir: is the current submission directory.
    @type curdir: string
    @param field_name: is the form field name that might exists on disk.
    @type field_name: string
    @param separator: is an optional separator. If it exists, it will be used
        to retrieve multiple values contained in the field.
    @type separator: string
    @param system_number_file: is the name of the file on disk in curdir, that
        is supposed to contain the record id.
    @type system_number_file: string
    @param tag: is the full MARC tag (tag+ind1+ind2+code) that should
        contain values. If not specified, only values in curdir will
        be retrieved.
    @type tag: 6-chars
    @return: the field value(s).
    @rtype: list of strings.

    @note: if field_name exists in curdir it will take precedence over
        retrieving the values from the record.
    """
    field_file = os.path.join(curdir, field_name)
    if os.path.exists(field_file):
        field_value = open(field_file).read()
        if separator is not None:
            return [
                value.strip() for value in field_value.split(separator)
                if value.strip()
            ]
        else:
            return [field_value.strip()]
    elif tag is not None:
        system_number_file = os.path.join(curdir, system_number_file)
        if os.path.exists(system_number_file):
            recid = int(open(system_number_file).read().strip())
            record = get_record(recid)
            if separator:
                return record_get_field_values(record, tag[:3], tag[3], tag[4],
                                               tag[5])
            else:
                return [
                    record_get_field_value(record, tag[:3], tag[3], tag[4],
                                           tag[5])
                ]
    return []
コード例 #9
0
ファイル: utils.py プロジェクト: jiangmin9/invenio
def get_xml_from_textmarc(recid, textmarc_record, uid=None):
    """
    Convert textmarc to marcxml and return the result of the conversion

    @param recid: id of the record that is being converted
    @type: int

    @param textmarc_record: record content in textmarc format
    @type: string

    @return: dictionary with the following keys:
            * resultMsg: message describing conversion status
            * resultXML: xml resulting from conversion
            * parse_error: in case of error, a description of it
    @rtype: dict
    """
    response = {}
    # Let's remove empty lines
    textmarc_record = os.linesep.join([s for s in textmarc_record.splitlines() if s])

    # Create temp file with textmarc to be converted by textmarc2xmlmarc
    (file_descriptor, file_name) = tempfile.mkstemp()
    f = os.fdopen(file_descriptor, "w")

    # If there is a cache file, add the controlfields
    if cache_exists(recid, uid):
        record = get_cache_contents(recid, uid)[2]
        for tag in record:
            if tag.startswith("00") and tag != "001":  # It is a controlfield
                f.write("%09d %s %s\n" % (recid, tag + "__", record_get_field_value(record, tag)))

    # Write content appending sysno at beginning
    for line in textmarc_record.splitlines():
        f.write("%09d %s\n" % (recid, re.sub(r"\s+", " ", line.strip())))
    f.close()

    old_stdout = sys.stdout
    try:
        # Redirect output, transform, restore old references
        new_stdout = StringIO()
        sys.stdout = new_stdout
        try:
            transform_file(file_name)
            response["resultMsg"] = "textmarc_parsing_success"
            response["resultXML"] = new_stdout.getvalue()
        except ParseError as e:
            # Something went wrong, notify user
            response["resultXML"] = ""
            response["resultMsg"] = "textmarc_parsing_error"
            response["parse_error"] = [e.lineno, " ".join(e.linecontent.split()[1:]), e.message]
    finally:
        sys.stdout = old_stdout

    return response
コード例 #10
0
ファイル: cnum.py プロジェクト: osub3/invenio
def populate_cnums():
    """Populate table seqSTORE with the cnums present in CONFERENCE records."""
    # First get all records from conference collection
    conf_records = perform_request_search(cc="Conferences",
                                          p="111__g:C*", rg=0)

    for recid in conf_records:
        cnum = record_get_field_value(
            get_bibrecord(recid), tag="111", ind1="", ind2="", code="g")
        if cnum:
            if not _cnum_exists(cnum):
                _insert_cnum(cnum)
                print("cnum %s from record %s inserted" % (cnum, recid))
コード例 #11
0
ファイル: cnum.py プロジェクト: chokribr/invenio-1
    def _next_value(self, recid=None, xml_record=None, start_date=None):
        """
        Returns the next cnum for the given recid

        @param recid: id of the record where the cnum will be generated
        @type recid: int

        @param xml_record: record in xml format
        @type xml_record: string

        @param start_date: use given start date
        @type start_date: string

        @return: next cnum for the given recid. Format is Cyy-mm-dd.[.1n]
        @rtype: string

        @raises ConferenceNoStartDateError: No date information found in the
        given recid
        """
        bibrecord = None
        if recid is None and xml_record is not None:
            bibrecord = create_record(xml_record)[0]
        elif recid is not None:
            bibrecord = get_bibrecord(recid)

        if start_date is None and bibrecord is not None:
            start_date = record_get_field_value(bibrecord,
                                                tag="111",
                                                ind1="",
                                                ind2="",
                                                code="x")

        if not start_date:
            raise ConferenceNoStartDateError

        base_cnum = "C" + start_date[2:]

        record_cnums = self._get_record_cnums(base_cnum)
        if not record_cnums:
            new_cnum = base_cnum
        else:
            # Get the max current revision, cnums are in format Cyy-mm-dd,
            # Cyy-mm-dd.1, Cyy-mm-dd.2
            highest_revision = max([0] + [
                int(rev[0].split('.')[1])
                for rev in record_cnums if '.' in rev[0]
            ])
            new_cnum = base_cnum + '.' + str(highest_revision + 1)

        return new_cnum
コード例 #12
0
ファイル: api.py プロジェクト: SCOAP3/invenio
def perform_get_holdings_information(recid, req, action="borrowal", ln=CFG_SITE_LANG):
    """
    Display all the copies of an item. If the parameter action is 'proposal', display
    appropriate information to the user.

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @param action: Specifies whether the current record is put up to solicit acquisition
    proposals(if "proposal") or not("borrowal").
    @type proposal: string

    @return body(html)
    """
    _ = gettext_set_language(ln)

    if action == "proposal":
        tag = AMZ_BOOK_PUBLICATION_DATE_TAG
        publication_date = record_get_field_value(get_record(recid), tag[:3],
                                                  ind1=tag[3], ind2=tag[4],
                                                  code=tag[5])
        msg = ''
        if publication_date:
            cur_date = datetime.date.today()
            try:
                pub_date = time.strptime(publication_date, '%d %b %Y')
                pub_date = datetime.date(pub_date[0], pub_date[1], pub_date[2])
                if cur_date < pub_date:
                    msg += _("The publication date of this book is %(x_date)s.", x_date=(publication_date))
                    msg += "<br /><br />"
                else:
                    msg += _("This book has no copies in the library. ")
            except:
                msg += _("This book has no copies in the library. ")

        msg += _("If you think this book is interesting, suggest it and tell us why you consider this \
                  book is important. The library will consider your opinion and if we decide to buy the \
                  book, we will issue a loan for you as soon as it arrives and send it by internal mail.")
        msg += "<br \><br \>"
        msg += _("In case we decide not to buy the book, we will offer you an interlibrary loan")

        body = bc_templates.tmpl_book_proposal_information(recid, msg, ln=ln)
    else:
        holdings_information = db.get_holdings_information(recid, False)
        body = bc_templates.tmpl_holdings_information(recid=recid,
                                            req=req,
                                            holdings_info=holdings_information,
                                            ln=ln)

    return body
コード例 #13
0
ファイル: cnum.py プロジェクト: SCOAP3/invenio
    def _next_value(self, recid=None, xml_record=None, start_date=None):
        """
        Returns the next cnum for the given recid

        @param recid: id of the record where the cnum will be generated
        @type recid: int

        @param xml_record: record in xml format
        @type xml_record: string

        @param start_date: use given start date
        @type start_date: string

        @return: next cnum for the given recid. Format is Cyy-mm-dd.[.1n]
        @rtype: string

        @raises ConferenceNoStartDateError: No date information found in the
        given recid
        """
        bibrecord = None
        if recid is None and xml_record is not None:
            bibrecord = create_record(xml_record)[0]
        elif recid is not None:
            bibrecord = get_bibrecord(recid)

        if start_date is None and bibrecord is not None:
            start_date = record_get_field_value(bibrecord,
                                                tag="111",
                                                ind1="",
                                                ind2="",
                                                code="x")

        if not start_date:
            raise ConferenceNoStartDateError

        base_cnum = "C" + start_date[2:]

        record_cnums = self._get_record_cnums(base_cnum)
        if not record_cnums:
            new_cnum = base_cnum
        else:
            # Get the max current revision, cnums are in format Cyy-mm-dd,
            # Cyy-mm-dd.1, Cyy-mm-dd.2
            highest_revision = max([0] + [int(rev[0].split('.')[1]) for rev in record_cnums if '.' in rev[0]])
            new_cnum = base_cnum + '.' + str(highest_revision + 1)

        return new_cnum
コード例 #14
0
ファイル: Shared_Functions.py プロジェクト: SCOAP3/invenio
def retrieve_field_values(curdir, field_name, separator=None, system_number_file='SN', tag=None):
    """
    This is a handy function to retrieve values either from the current
    submission directory, when a form has been just submitted, or from
    an existing record (e.g. during MBI action).

    @param curdir: is the current submission directory.
    @type curdir: string
    @param field_name: is the form field name that might exists on disk.
    @type field_name: string
    @param separator: is an optional separator. If it exists, it will be used
        to retrieve multiple values contained in the field.
    @type separator: string
    @param system_number_file: is the name of the file on disk in curdir, that
        is supposed to contain the record id.
    @type system_number_file: string
    @param tag: is the full MARC tag (tag+ind1+ind2+code) that should
        contain values. If not specified, only values in curdir will
        be retrieved.
    @type tag: 6-chars
    @return: the field value(s).
    @rtype: list of strings.

    @note: if field_name exists in curdir it will take precedence over
        retrieving the values from the record.
    """
    field_file = os.path.join(curdir, field_name)
    if os.path.exists(field_file):
        field_value = open(field_file).read()
        if separator is not None:
            return [value.strip() for value in field_value.split(separator) if value.strip()]
        else:
            return [field_value.strip()]
    elif tag is not None:
        system_number_file = os.path.join(curdir, system_number_file)
        if os.path.exists(system_number_file):
            recid = int(open(system_number_file).read().strip())
            record = get_record(recid)
            if separator:
                return record_get_field_values(record, tag[:3], tag[3], tag[4], tag[5])
            else:
                return [record_get_field_value(record, tag[:3], tag[3], tag[4], tag[5])]
    return []
コード例 #15
0
ファイル: utils.py プロジェクト: Theer108/invenio
def add_record_cnum(recid, uid):
    """
    Check if the record has already a cnum. If not generate a new one
    and return the result

    @param recid: recid of the record under check. Used to retrieve cache file
    @type recid: int

    @param uid: id of the user. Used to retrieve cache file
    @type uid: int

    @return: None if cnum already present, new cnum otherwise
    @rtype: None or string
    """
    # Import placed here to avoid circular dependency
    from invenio.modules.sequencegenerator.cnum import CnumSeq, ConferenceNoStartDateError

    record_revision, record, pending_changes, deactivated_hp_changes, \
    undo_list, redo_list = get_cache_contents(recid, uid)[1:]

    record_strip_empty_volatile_subfields(record)

    # Check if record already has a cnum
    tag_111__g_content = record_get_field_value(record, "111", " ", " ", "g")
    if tag_111__g_content:
        return
    else:
        cnum_seq = CnumSeq()
        try:
            new_cnum = cnum_seq.next_value(xml_record=wash_for_xml(print_rec(record)))
        except ConferenceNoStartDateError:
            return None
        field_add_subfield(record['111'][0], 'g', new_cnum)
        update_cache_contents(recid, uid, record_revision,
                                   record,
                                   pending_changes,
                                   deactivated_hp_changes,
                                   undo_list, redo_list)
        return new_cnum
コード例 #16
0
ファイル: engine.py プロジェクト: derekstrom/invenio
    def control_field(self, tag, escape=0):
        """
        Returns the value of control field given by tag in record

        :param tag: the marc code of a field
        :param escape: 1 if returned value should be escaped. Else 0.
        @return: value of field tag in record
        """
        if self.get_record() is None:
            #Case where BibRecord could not parse object
            return ''

        p_tag = parse_tag(tag)
        field_value = record_get_field_value(self.get_record(),
                                             p_tag[0],
                                             p_tag[1],
                                             p_tag[2],
                                             p_tag[3])
        if escape == 0:
            return field_value
        else:
            return escape_field(field_value, escape)
コード例 #17
0
def add_record_cnum(recid, uid):
    """
    Check if the record has already a cnum. If not generate a new one
    and return the result

    @param recid: recid of the record under check. Used to retrieve cache file
    @type recid: int

    @param uid: id of the user. Used to retrieve cache file
    @type uid: int

    @return: None if cnum already present, new cnum otherwise
    @rtype: None or string
    """
    # Import placed here to avoid circular dependency
    from invenio.modules.sequencegenerator.cnum import CnumSeq, ConferenceNoStartDateError

    record_revision, record, pending_changes, deactivated_hp_changes, \
    undo_list, redo_list = get_cache_contents(recid, uid)[1:]

    record_strip_empty_volatile_subfields(record)

    # Check if record already has a cnum
    tag_111__g_content = record_get_field_value(record, "111", " ", " ", "g")
    if tag_111__g_content:
        return
    else:
        cnum_seq = CnumSeq()
        try:
            new_cnum = cnum_seq.next_value(xml_record=wash_for_xml(print_rec(record)))
        except ConferenceNoStartDateError:
            return None
        field_add_subfield(record['111'][0], 'g', new_cnum)
        update_cache_contents(recid, uid, record_revision,
                                   record,
                                   pending_changes,
                                   deactivated_hp_changes,
                                   undo_list, redo_list)
        return new_cnum
コード例 #18
0
ファイル: texkey.py プロジェクト: chokribr/invenio-1
    def _next_value(self, recid=None, xml_record=None, bibrecord=None):
        """
        Returns the next texkey for the given recid

        @param recid: id of the record where the texkey will be generated
        @type recid: int

        @param xml_record: record in xml format
        @type xml_record: string

        @return: next texkey for the given recid.
        @rtype: string

        @raises TexkeyNoAuthorError: No main author (100__a) or collaboration
        (710__g) in the given recid
        """
        if recid is None and xml_record is not None:
            bibrecord = create_record(xml_record)[0]
        elif bibrecord is None:
            bibrecord = get_bibrecord(recid)

        main_author = record_get_field_value(bibrecord,
                                             tag="100",
                                             ind1="",
                                             ind2="",
                                             code="a")

        if not main_author:
            # Try with collaboration name
            main_author = record_get_field_value(bibrecord,
                                                 tag="710",
                                                 ind1="",
                                                 ind2="",
                                                 code="g")
            main_author = "".join([
                p for p in main_author.split() if p.lower() != "collaboration"
            ])

        if not main_author:
            # Try with corporate author
            main_author = record_get_field_value(bibrecord,
                                                 tag="100",
                                                 ind1="",
                                                 ind2="",
                                                 code="a")
            if not main_author:
                raise TexkeyNoAuthorError

        # Remove utf-8 special characters
        main_author = unidecode(main_author.decode('utf-8'))
        try:
            texkey_first_part = main_author.split(',')[0].replace(" ", "")
        except KeyError:
            texkey_first_part = ""

        year = record_get_field_value(bibrecord,
                                      tag="269",
                                      ind1="",
                                      ind2="",
                                      code="c")
        if not year:
            year = record_get_field_value(bibrecord,
                                          tag="260",
                                          ind1="",
                                          ind2="",
                                          code="c")
            if not year:
                year = record_get_field_value(bibrecord,
                                              tag="773",
                                              ind1="",
                                              ind2="",
                                              code="y")
                if not year:
                    year = record_get_field_value(bibrecord,
                                                  tag="502",
                                                  ind1="",
                                                  ind2="",
                                                  code="d")

                    if not year:
                        raise TexkeyNoYearError

        try:
            texkey_second_part = year.split("-")[0]
        except KeyError:
            texkey_second_part = ""

        texkey_third_part = _texkey_random_chars(recid)

        texkey = texkey_first_part + ":" + texkey_second_part + texkey_third_part

        tries = 0
        while self._value_exists(texkey) and tries < TEXKEY_MAXTRIES:
            # Key is already in the DB, generate a new one
            texkey_third_part = _texkey_random_chars(recid, use_random=True)
            texkey = texkey_first_part + ":" + texkey_second_part + texkey_third_part
            tries += 1

        return texkey
コード例 #19
0
def get_doi_for_records(records):
    """
    Query crossref to obtain the DOI of a set of records

    @params records: List of records
    @returns dict {record_id : doi}
    """
    from itertools import islice, chain

    def batch(iterable, size):
        sourceiter = iter(iterable)
        while True:
            batchiter = islice(sourceiter, size)
            yield chain([batchiter.next()], batchiter)

    pipes = []
    for record in records:
        data = [
            "",  # ISSN
            "",  # JOURNAL TITLE (773__p)
            "",  # AUTHOR (Family name of 100__a)
            "",  # VOLUME (773__v)
            "",  # ISSUE (773__n)
            "",  # PAGE (773__c)
            "",  # YEAR  (773__y)
            "",  # RESOURCE TYPE
            "",  # KEY
            ""  # DOI
        ]

        full_author = record_get_field_value(record, "100", "", "",
                                             "a").split(",")
        if len(full_author) > 0:
            data[2] = full_author[0]

        data[8] = str(record["001"][0][3])

        for subfield, position in ("p", 1), ("v", 3), ("n", 4), ("c", 5), ("y",
                                                                           6):
            for tag, ind1, ind2 in [("773", "", "")]:
                val = record_get_field_value(record, tag, ind1, ind2, subfield)
                if val:
                    if subfield == "c":
                        # strip page range to send only starting page
                        if '-' in val:
                            val = val.split('-')[0]
                    data[position] = val
                    break

        if not data[1] or not data[3] or not data[5]:
            continue  # We need journal title, volume and page

        pipes.append("|".join(data))

    dois = {}
    if len(pipes) > 0:
        for batchpipes in batch(pipes, 10):
            params = {
                "usr": CFG_CROSSREF_USERNAME,
                "pwd": CFG_CROSSREF_PASSWORD,
                "format": "unixref",
                "qdata": "\n".join(batchpipes)
            }
            url = "http://doi.crossref.org/servlet/query"
            data = urllib.urlencode(params)

            retry_attempt = 0

            while retry_attempt < 10:
                try:
                    document = parse(CROSSREF_OPENER.open(url, data))
                    break
                except (urllib2.URLError, urllib2.HTTPError):
                    sleep(5)
                    retry_attempt += 1

            results = document.getElementsByTagName("doi_record")

            for result in results:
                record_id = result.getAttribute("key")
                doi_tags = result.getElementsByTagName("doi")
                if len(doi_tags) == 1:
                    dois[record_id] = doi_tags[0].firstChild.nodeValue

            # Avoid sending too many requests
            sleep(0.5)
    return dois
コード例 #20
0
ファイル: metadata_curation.py プロジェクト: SCOAP3/invenio
def generate_ticket(ticket, record):
    """
    Generates a ticket to be created, filling subject, body and queue values
    of the passed BibCatalogTicket object. The enriched object is returned.

    @param ticket: a ticket object as created by BibCatalogTicket() containing
                   the subject, body and queue to create a ticket in.
    @type ticket: record object of BibCatalogTicket.

    @param record: a recstruct object as created by bibrecord.create_record()
    @type record: record object of BibRecord.

    @return: the modified ticket object to create.
    @rtype: BibCatalogTicket
    """
    title_code = load_tag_code_from_name("title")
    abstract_code = load_tag_code_from_name("abstract")

    try:
        date_code = load_tag_code_from_name("date")
    except BibCatalogTagNotFound:
        date_code = load_tag_code_from_name("year")

    category_code = load_tag_code_from_name("subject")

    try:
        notes_code = load_tag_code_from_name("note")
    except BibCatalogTagNotFound:
        notes_code = load_tag_code_from_name("comment")

    first_author_code = load_tag_code_from_name("first author name")
    additional_author_code = load_tag_code_from_name("additional author name")

    try:
        external_id_code = load_tag_code_from_name("ext system ID")
    except BibCatalogTagNotFound:
        external_id_code = load_tag_code_from_name("primary report number")

    # List of extra info to print in the ticket.
    extra_info = []
    recid = record_id_from_record(record)

    arxiv_id = _get_minimal_arxiv_id(record, external_id_code)
    if arxiv_id:
        # We have an arxiv id - we can add special info:
        extra_info.append("ABSTRACT: http://arxiv.org/abs/%s" % (arxiv_id,))
        extra_info.append("PDF: http://arxiv.org/pdf/%s" % (arxiv_id,))

        categories = record_get_value_with_provenence(record=record,
                                                      provenence_code="2",
                                                      provenence_value="arXiv",
                                                      **split_tag_code(category_code))
        comments = record_get_value_with_provenence(record=record,
                                                    provenence_code="9",
                                                    provenence_value="arXiv",
                                                    **split_tag_code(notes_code))
        external_ids = arxiv_id
        subject = "ARXIV:" + arxiv_id
    else:
        # Not an arxiv record - Lets get generic info
        categories = record_get_value_with_provenence(record=record,
                                                      provenence_code="2",
                                                      provenence_value="SzGeCERN",
                                                      **split_tag_code(category_code))
        comments = record_get_field_values(rec=record,
                                           **split_tag_code(notes_code))
        external_id_list = record_get_field_values(rec=record,
                                                   **split_tag_code(external_id_code))
        external_ids = ", ".join(external_id_list)
        subject = "Record #%s %s" % (recid, external_ids)

    authors = record_get_field_values(record, **split_tag_code(first_author_code)) + \
              record_get_field_values(record, **split_tag_code(additional_author_code))

    text = """
%(submitdate)s

External IDs: %(external_ids)s

Title: %(title)s

Authors: %(authors)s

Categories: %(categories)s

Comments: %(comments)s

%(abstract)s

%(extra_info)s

Edit the record now: %(editurl)s

""" \
    % {
        'external_ids': external_ids,
        'submitdate': record_get_field_value(record, **split_tag_code(date_code)),
        'extra_info': "\n".join(extra_info),
        'title': record_get_field_value(record, **split_tag_code(title_code)),
        'comments': "; ".join(comments),
        'categories': " ".join(categories),
        'authors': " / ".join(authors[:10]),
        'abstract': record_get_field_value(record, **split_tag_code(abstract_code)),
        'editurl': "%s/record/edit/%s" % (CFG_SITE_URL, recid),
    }
    # To avoid errors with string formatting later, we are escaping %'s
    ticket.subject = subject
    ticket.body = text.replace('%', '%%')
    ticket.queue = "Test"
    return ticket
コード例 #21
0
ファイル: updater.py プロジェクト: mhellmic/b2share
def oairepositoryupdater_task():
    """Main business logic code of oai_archive"""
    no_upload = task_get_option("no_upload")
    report = task_get_option("report")

    if report > 1:
        print_repository_status(verbose=report)
        return True

    initial_snapshot = {}
    for set_spec in all_set_specs():
        initial_snapshot[set_spec] = get_set_definitions(set_spec)
    write_message("Initial set snapshot: %s" % pformat(initial_snapshot), verbose=2)

    task_update_progress("Fetching records to process")

    recids_with_oaiid = search_unit_in_bibxxx(p='*', f=CFG_OAI_ID_FIELD, type='e')
    write_message("%s recids have an OAI ID" % len(recids_with_oaiid), verbose=2)

    all_current_recids = search_unit_in_bibxxx(p='*', f=CFG_OAI_SET_FIELD, type='e')
    no_more_exported_recids = intbitset(all_current_recids)
    write_message("%s recids are currently exported" % (len(all_current_recids)), verbose=2)

    all_affected_recids = intbitset()
    all_should_recids = intbitset()
    recids_for_set = {}
    for set_spec in all_set_specs():
        if not set_spec:
            set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
        should_recids = get_recids_for_set_spec(set_spec)
        recids_for_set[set_spec] = should_recids
        no_more_exported_recids -= should_recids
        all_should_recids |= should_recids
        current_recids = search_unit_in_bibxxx(p=set_spec, f=CFG_OAI_SET_FIELD, type='e')
        write_message("%s recids should be in %s. Currently %s are in %s" % (len(should_recids), set_spec, len(current_recids), set_spec), verbose=2)
        to_add = should_recids - current_recids
        write_message("%s recids should be added to %s" % (len(to_add), set_spec), verbose=2)
        to_remove = current_recids - should_recids
        write_message("%s recids should be removed from %s" % (len(to_remove), set_spec), verbose=2)
        affected_recids = to_add | to_remove
        write_message("%s recids should be hence updated for %s" % (len(affected_recids), set_spec), verbose=2)
        all_affected_recids |= affected_recids

    missing_oaiid = all_should_recids - recids_with_oaiid
    write_message("%s recids are missing an oaiid" % len(missing_oaiid))
    write_message("%s recids should no longer be exported" % len(no_more_exported_recids))

    ## Let's add records with missing OAI ID
    all_affected_recids |= missing_oaiid | no_more_exported_recids
    write_message("%s recids should updated" % (len(all_affected_recids)), verbose=2)

    if not all_affected_recids:
        write_message("Nothing to do!")
        return True

    # Prepare to save results in a tmp file
    (fd, filename) = mkstemp(dir=CFG_TMPDIR,
                                  prefix='oairepository_' + \
                                  time.strftime("%Y%m%d_%H%M%S_",
                                                time.localtime()))
    oai_out = os.fdopen(fd, "w")
    oai_out.write("<collection>")

    tot = 0
    # Iterate over the recids
    for i, recid in enumerate(all_affected_recids):
        task_sleep_now_if_required(can_stop_too=True)
        task_update_progress("Done %s out of %s records." % \
                             (i, len(all_affected_recids)))

        write_message("Elaborating recid %s" % recid, verbose=3)
        record = get_record(recid)
        if not record:
            write_message("Record %s seems empty. Let's skip it." % recid, verbose=3)
            continue
        new_record = {}

        # Check if an OAI identifier is already in the record or
        # not.
        assign_oai_id_entry = False
        oai_id_entry = record_get_field_value(record, tag=CFG_OAI_ID_FIELD[:3], ind1=CFG_OAI_ID_FIELD[3], ind2=CFG_OAI_ID_FIELD[4], code=CFG_OAI_ID_FIELD[5])
        if not oai_id_entry:
            assign_oai_id_entry = True
            oai_id_entry = "oai:%s:%s" % (CFG_OAI_ID_PREFIX, recid)
            write_message("Setting new oai_id %s for record %s" % (oai_id_entry, recid), verbose=3)
        else:
            write_message("Already existing oai_id %s for record %s" % (oai_id_entry, recid), verbose=3)

        # Get the sets to which this record already belongs according
        # to the metadata
        current_oai_sets = set(record_get_field_values(record, tag=CFG_OAI_SET_FIELD[:3], ind1=CFG_OAI_SET_FIELD[3], ind2=CFG_OAI_SET_FIELD[4], code=CFG_OAI_SET_FIELD[5]))
        write_message("Record %s currently belongs to these oai_sets: %s" % (recid, ", ".join(current_oai_sets)), verbose=3)

        current_previous_oai_sets = set(record_get_field_values(record, tag=CFG_OAI_PREVIOUS_SET_FIELD[:3], ind1=CFG_OAI_PREVIOUS_SET_FIELD[3], ind2=CFG_OAI_PREVIOUS_SET_FIELD[4], code=CFG_OAI_PREVIOUS_SET_FIELD[5]))
        write_message("Record %s currently doesn't belong anymore to these oai_sets: %s" % (recid, ", ".join(current_previous_oai_sets)), verbose=3)

        # Get the sets that should be in this record according to
        # settings
        updated_oai_sets = set(_set for _set, _recids in iteritems(recids_for_set)
             if recid in _recids)
        write_message("Record %s now belongs to these oai_sets: %s" % (recid, ", ".join(updated_oai_sets)), verbose=3)

        updated_previous_oai_sets = set(_set for _set in (current_previous_oai_sets - updated_oai_sets) |
             (current_oai_sets - updated_oai_sets))
        write_message("Record %s now doesn't belong anymore to these oai_sets: %s" % (recid, ", ".join(updated_previous_oai_sets)), verbose=3)

        # Ok, we have the old sets and the new sets. If they are equal
        # and oai ID does not need to be added, then great, nothing to
        # change . Otherwise apply the new sets.
        if current_oai_sets == updated_oai_sets and not assign_oai_id_entry:
            write_message("Nothing has changed for record %s, let's move on!" % recid, verbose=3)
            continue # Jump to next recid

        write_message("Something has changed for record %s, let's update it!" % recid, verbose=3)
        subfields = [(CFG_OAI_ID_FIELD[5], oai_id_entry)]
        for oai_set in updated_oai_sets:
            subfields.append((CFG_OAI_SET_FIELD[5], oai_set))
        for oai_set in updated_previous_oai_sets:
            subfields.append((CFG_OAI_PREVIOUS_SET_FIELD[5], oai_set))

        record_add_field(new_record, tag="001", controlfield_value=str(recid))
        record_add_field(new_record, tag=CFG_OAI_ID_FIELD[:3], ind1=CFG_OAI_ID_FIELD[3], ind2=CFG_OAI_ID_FIELD[4], subfields=subfields)
        oai_out.write(record_xml_output(new_record))
        tot += 1
        if tot == CFG_OAI_REPOSITORY_MARCXML_SIZE:
            oai_out.write("</collection>")
            oai_out.close()
            write_message("Wrote to file %s" % filename)
            if not no_upload:
                if task_get_option("notimechange"):
                    task_low_level_submission('bibupload', 'oairepository', '-c', filename, '-n')
                else:
                    task_low_level_submission('bibupload', 'oairepository', '-c', filename)
            # Prepare to save results in a tmp file
            (fd, filename) = mkstemp(dir=CFG_TMPDIR,
                                        prefix='oairepository_' + \
                                        time.strftime("%Y%m%d_%H%M%S_",
                                                        time.localtime()))
            oai_out = os.fdopen(fd, "w")
            oai_out.write("<collection>")
            tot = 0
            task_sleep_now_if_required(can_stop_too=True)

    oai_out.write("</collection>")
    oai_out.close()
    write_message("Wrote to file %s" % filename)

    if tot > 0:
        if not no_upload:
            task_sleep_now_if_required(can_stop_too=True)
            if task_get_option("notimechange"):
                task_low_level_submission('bibupload', 'oairepository', '-c', filename, '-n')
            else:
                task_low_level_submission('bibupload', 'oairepository', '-c', filename)
    else:
        os.remove(filename)

    return True
コード例 #22
0
ファイル: revisionverifier.py プロジェクト: SCOAP3/invenio
    def verify_revision(self, verify_record, original_record, opt_mode=None):
        """
        Compares the upload record with the same 005 record from archive.

        Once the changes are identified, The latest revision of the record is fetched
        from the system and the identified changes are applied over the latest.

        Returns record patch in case of non-conflicting addition/modification/deletion
        Conflicting records raise Error and stops the bibupload process
        """

        upload_rev = ''
        original_rev = ''
        r_date = ''
        record_patch = {}

        # No need for revision check for other operations
        if opt_mode not in ['replace', 'correct']:
            return

        if '001' in verify_record:
            self.rec_id = record_get_field_value(verify_record, '001')

        # Retrieving Revision tags for comparison
        if '005' in verify_record:
            upload_rev = record_get_field_value(verify_record, '005')
            r_date = upload_rev.split('.')[0]

            if r_date not in [k[1] for k in get_record_revisions(self.rec_id)]:
                raise InvenioBibUploadInvalidRevisionError(self.rec_id, r_date)
        else:
            raise InvenioBibUploadMissing005Error(self.rec_id)

        if '005' in original_record:
            original_rev = record_get_field_value(original_record, '005')
        else:
            raise InvenioBibUploadMissing005Error(self.rec_id)

        # Retrieving the archived version
        marc_xml = get_marcxml_of_record_revision(self.rec_id, r_date)
        res = create_record(zlib.decompress(marc_xml[0][0]))
        archived_record = res[0]

        # Comparing Upload and Archive record
        curr_patch = self.compare_records(verify_record, archived_record, opt_mode)

        # No changes in Upload Record compared to Archived Revision
        # Raising Error to skip the bibupload for the record
        if not curr_patch:
            raise InvenioBibUploadUnchangedRecordError(self.rec_id, upload_rev)

        if original_rev == upload_rev:
            # Upload, Archive and Original Records have same Revisions.
            affected_tags = self.retrieve_affected_tags_with_ind(curr_patch)
            return ('correct', self.generate_final_patch(curr_patch, self.rec_id), affected_tags)

        # Comparing Original and Archive record
        orig_patch = self.compare_records(original_record, archived_record, opt_mode)

        # Checking for conflicts
        # If no original patch - Original Record same as Archived Record
        if orig_patch:
            curr_patch = self.detect_conflict(verify_record, curr_patch, upload_rev, \
                                                original_record, orig_patch, original_rev)

        record_patch = self.generate_final_patch(curr_patch, self.rec_id)
        affected_tags = self.retrieve_affected_tags_with_ind(curr_patch)

        # Returning patch in case of no conflicting fields
        return ('correct', record_patch, affected_tags)
コード例 #23
0
def generate_ticket(ticket, record):
    """
    Generates a ticket to be created, filling subject, body and queue values
    of the passed BibCatalogTicket object. The enriched object is returned.

    @param ticket: a ticket object as created by BibCatalogTicket() containing
                   the subject, body and queue to create a ticket in.
    @type ticket: record object of BibCatalogTicket.

    @param record: a recstruct object as created by bibrecord.create_record()
    @type record: record object of BibRecord.

    @return: the modified ticket object to create.
    @rtype: BibCatalogTicket
    """
    title_code = load_tag_code_from_name("title")
    abstract_code = load_tag_code_from_name("abstract")

    try:
        date_code = load_tag_code_from_name("date")
    except BibCatalogTagNotFound:
        date_code = load_tag_code_from_name("year")

    category_code = load_tag_code_from_name("subject")

    try:
        notes_code = load_tag_code_from_name("note")
    except BibCatalogTagNotFound:
        notes_code = load_tag_code_from_name("comment")

    first_author_code = load_tag_code_from_name("first author name")
    additional_author_code = load_tag_code_from_name("additional author name")

    try:
        external_id_code = load_tag_code_from_name("ext system ID")
    except BibCatalogTagNotFound:
        external_id_code = load_tag_code_from_name("primary report number")

    # List of extra info to print in the ticket.
    extra_info = []
    recid = record_id_from_record(record)

    arxiv_id = _get_minimal_arxiv_id(record, external_id_code)
    if arxiv_id:
        # We have an arxiv id - we can add special info:
        extra_info.append("ABSTRACT: http://arxiv.org/abs/%s" % (arxiv_id, ))
        extra_info.append("PDF: http://arxiv.org/pdf/%s" % (arxiv_id, ))

        categories = record_get_value_with_provenence(
            record=record,
            provenence_code="2",
            provenence_value="arXiv",
            **split_tag_code(category_code))
        comments = record_get_value_with_provenence(
            record=record,
            provenence_code="9",
            provenence_value="arXiv",
            **split_tag_code(notes_code))
        external_ids = arxiv_id
        subject = "ARXIV:" + arxiv_id
    else:
        # Not an arxiv record - Lets get generic info
        categories = record_get_value_with_provenence(
            record=record,
            provenence_code="2",
            provenence_value="SzGeCERN",
            **split_tag_code(category_code))
        comments = record_get_field_values(rec=record,
                                           **split_tag_code(notes_code))
        external_id_list = record_get_field_values(
            rec=record, **split_tag_code(external_id_code))
        external_ids = ", ".join(external_id_list)
        subject = "Record #%s %s" % (recid, external_ids)

    authors = record_get_field_values(record, **split_tag_code(first_author_code)) + \
              record_get_field_values(record, **split_tag_code(additional_author_code))

    text = """
%(submitdate)s

External IDs: %(external_ids)s

Title: %(title)s

Authors: %(authors)s

Categories: %(categories)s

Comments: %(comments)s

%(abstract)s

%(extra_info)s

Edit the record now: %(editurl)s

""" \
    % {
        'external_ids': external_ids,
        'submitdate': record_get_field_value(record, **split_tag_code(date_code)),
        'extra_info': "\n".join(extra_info),
        'title': record_get_field_value(record, **split_tag_code(title_code)),
        'comments': "; ".join(comments),
        'categories': " ".join(categories),
        'authors': " / ".join(authors[:10]),
        'abstract': record_get_field_value(record, **split_tag_code(abstract_code)),
        'editurl': "%s/record/edit/%s" % (CFG_SITE_URL, recid),
    }
    # To avoid errors with string formatting later, we are escaping %'s
    ticket.subject = subject
    ticket.body = text.replace('%', '%%')
    ticket.queue = "Test"
    return ticket
コード例 #24
0
def oairepositoryupdater_task():
    """Main business logic code of oai_archive"""
    no_upload = task_get_option("no_upload")
    report = task_get_option("report")

    if report > 1:
        print_repository_status(verbose=report)
        return True

    if run_sql(
            "SELECT id FROM schTASK WHERE proc='bibupload:oairepository' AND status='WAITING'"
    ):
        write_message(
            "Previous requests of oairepository still being elaborated. Let's skip this execution."
        )
        return True

    initial_snapshot = {}
    for set_spec in all_set_specs():
        initial_snapshot[set_spec] = get_set_definitions(set_spec)
    write_message("Initial set snapshot: %s" % pformat(initial_snapshot),
                  verbose=2)

    task_update_progress("Fetching records to process")

    recids_with_oaiid = search_unit_in_bibxxx(p='*',
                                              f=CFG_OAI_ID_FIELD,
                                              type='e')
    write_message("%s recids have an OAI ID" % len(recids_with_oaiid),
                  verbose=2)

    all_current_recids = search_unit_in_bibxxx(p='*',
                                               f=CFG_OAI_SET_FIELD,
                                               type='e')
    no_more_exported_recids = intbitset(all_current_recids)
    write_message("%s recids are currently exported" %
                  (len(all_current_recids)),
                  verbose=2)

    all_affected_recids = intbitset()
    all_should_recids = intbitset()
    recids_for_set = {}
    for set_spec in all_set_specs():
        if not set_spec:
            set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
        should_recids = get_recids_for_set_spec(set_spec)
        recids_for_set[set_spec] = should_recids
        no_more_exported_recids -= should_recids
        all_should_recids |= should_recids
        current_recids = search_unit_in_bibxxx(p=set_spec,
                                               f=CFG_OAI_SET_FIELD,
                                               type='e')
        write_message(
            "%s recids should be in %s. Currently %s are in %s" %
            (len(should_recids), set_spec, len(current_recids), set_spec),
            verbose=2)
        to_add = should_recids - current_recids
        write_message("%s recids should be added to %s" %
                      (len(to_add), set_spec),
                      verbose=2)
        to_remove = current_recids - should_recids
        write_message("%s recids should be removed from %s" %
                      (len(to_remove), set_spec),
                      verbose=2)
        affected_recids = to_add | to_remove
        write_message("%s recids should be hence updated for %s" %
                      (len(affected_recids), set_spec),
                      verbose=2)
        all_affected_recids |= affected_recids

    missing_oaiid = all_should_recids - recids_with_oaiid
    write_message("%s recids are missing an oaiid" % len(missing_oaiid))
    write_message("%s recids should no longer be exported" %
                  len(no_more_exported_recids))

    ## Let's add records with missing OAI ID
    all_affected_recids |= missing_oaiid | no_more_exported_recids
    write_message("%s recids should updated" % (len(all_affected_recids)),
                  verbose=2)

    if not all_affected_recids:
        write_message("Nothing to do!")
        return True

    # Prepare to save results in a tmp file
    (fd, filename) = mkstemp(dir=CFG_TMPSHAREDDIR,
                                  prefix='oairepository_' + \
                                  time.strftime("%Y%m%d_%H%M%S_",
                                                time.localtime()))
    oai_out = os.fdopen(fd, "w")
    oai_out.write("<collection>")

    tot = 0
    # Iterate over the recids
    for i, recid in enumerate(all_affected_recids):
        task_sleep_now_if_required(can_stop_too=True)
        task_update_progress("Done %s out of %s records." % \
                             (i, len(all_affected_recids)))

        write_message("Elaborating recid %s" % recid, verbose=3)
        record = get_record(recid)
        if not record:
            write_message("Record %s seems empty. Let's skip it." % recid,
                          verbose=3)
            continue
        new_record = {}

        # Check if an OAI identifier is already in the record or
        # not.
        assign_oai_id_entry = False
        oai_id_entry = record_get_field_value(record,
                                              tag=CFG_OAI_ID_FIELD[:3],
                                              ind1=CFG_OAI_ID_FIELD[3],
                                              ind2=CFG_OAI_ID_FIELD[4],
                                              code=CFG_OAI_ID_FIELD[5])
        if not oai_id_entry:
            assign_oai_id_entry = True
            oai_id_entry = "oai:%s:%s" % (CFG_OAI_ID_PREFIX, recid)
            write_message("Setting new oai_id %s for record %s" %
                          (oai_id_entry, recid),
                          verbose=3)
        else:
            write_message("Already existing oai_id %s for record %s" %
                          (oai_id_entry, recid),
                          verbose=3)

        # Get the sets to which this record already belongs according
        # to the metadata
        current_oai_sets = set(
            record_get_field_values(record,
                                    tag=CFG_OAI_SET_FIELD[:3],
                                    ind1=CFG_OAI_SET_FIELD[3],
                                    ind2=CFG_OAI_SET_FIELD[4],
                                    code=CFG_OAI_SET_FIELD[5]))
        write_message("Record %s currently belongs to these oai_sets: %s" %
                      (recid, ", ".join(current_oai_sets)),
                      verbose=3)

        current_previous_oai_sets = set(
            record_get_field_values(record,
                                    tag=CFG_OAI_PREVIOUS_SET_FIELD[:3],
                                    ind1=CFG_OAI_PREVIOUS_SET_FIELD[3],
                                    ind2=CFG_OAI_PREVIOUS_SET_FIELD[4],
                                    code=CFG_OAI_PREVIOUS_SET_FIELD[5]))
        write_message(
            "Record %s currently doesn't belong anymore to these oai_sets: %s"
            % (recid, ", ".join(current_previous_oai_sets)),
            verbose=3)

        # Get the sets that should be in this record according to
        # settings
        updated_oai_sets = set(_set
                               for _set, _recids in iteritems(recids_for_set)
                               if recid in _recids)
        write_message("Record %s now belongs to these oai_sets: %s" %
                      (recid, ", ".join(updated_oai_sets)),
                      verbose=3)

        updated_previous_oai_sets = set(
            _set for _set in (current_previous_oai_sets - updated_oai_sets)
            | (current_oai_sets - updated_oai_sets))
        write_message(
            "Record %s now doesn't belong anymore to these oai_sets: %s" %
            (recid, ", ".join(updated_previous_oai_sets)),
            verbose=3)

        # Ok, we have the old sets and the new sets. If they are equal
        # and oai ID does not need to be added, then great, nothing to
        # change . Otherwise apply the new sets.
        if current_oai_sets == updated_oai_sets and not assign_oai_id_entry:
            write_message("Nothing has changed for record %s, let's move on!" %
                          recid,
                          verbose=3)
            continue  # Jump to next recid

        write_message("Something has changed for record %s, let's update it!" %
                      recid,
                      verbose=3)
        subfields = [(CFG_OAI_ID_FIELD[5], oai_id_entry)]
        for oai_set in updated_oai_sets:
            subfields.append((CFG_OAI_SET_FIELD[5], oai_set))
        for oai_set in updated_previous_oai_sets:
            subfields.append((CFG_OAI_PREVIOUS_SET_FIELD[5], oai_set))

        record_add_field(new_record, tag="001", controlfield_value=str(recid))
        record_add_field(new_record,
                         tag=CFG_OAI_ID_FIELD[:3],
                         ind1=CFG_OAI_ID_FIELD[3],
                         ind2=CFG_OAI_ID_FIELD[4],
                         subfields=subfields)
        oai_out.write(record_xml_output(new_record))
        tot += 1
        if tot == CFG_OAI_REPOSITORY_MARCXML_SIZE:
            oai_out.write("</collection>")
            oai_out.close()
            write_message("Wrote to file %s" % filename)
            if not no_upload:
                if task_get_option("notimechange"):
                    task_low_level_submission('bibupload', 'oairepository',
                                              '-c', filename, '-n',
                                              '-Noairepository', '-P', '-1')
                else:
                    task_low_level_submission('bibupload', 'oairepository',
                                              '-c', filename,
                                              '-Noairepository', '-P', '-1')
            # Prepare to save results in a tmp file
            (fd, filename) = mkstemp(dir=CFG_TMPSHAREDDIR,
                                        prefix='oairepository_' + \
                                        time.strftime("%Y%m%d_%H%M%S_",
                                                        time.localtime()))
            oai_out = os.fdopen(fd, "w")
            oai_out.write("<collection>")
            tot = 0
            task_sleep_now_if_required(can_stop_too=True)

    oai_out.write("</collection>")
    oai_out.close()
    write_message("Wrote to file %s" % filename)

    if tot > 0:
        if not no_upload:
            task_sleep_now_if_required(can_stop_too=True)
            if task_get_option("notimechange"):
                task_low_level_submission('bibupload', 'oairepository', '-c',
                                          filename, '-n')
            else:
                task_low_level_submission('bibupload', 'oairepository', '-c',
                                          filename)
    else:
        os.remove(filename)

    return True
コード例 #25
0
ファイル: texkey.py プロジェクト: dset0x/invenio
    def _next_value(self, recid=None, xml_record=None, bibrecord=None):
        """Return the next texkey for the given recid.

        :param recid: id of the record where the texkey will be generated
        :type recid: int

        :param xml_record: record in xml format
        :type xml_record: string

        :return: next texkey for the given recid.
        :rtype: string

        :raises TexkeyNoAuthorError: No main author (100__a) or collaboration
        (710__g) in the given recid
        """
        if recid is None and xml_record is not None:
            bibrecord = create_record(xml_record)[0]
        elif bibrecord is None:
            bibrecord = get_bibrecord(recid)

        main_author = record_get_field_value(bibrecord,
                                             tag="100",
                                             ind1="",
                                             ind2="",
                                             code="a")

        if not main_author:
            # Try with collaboration name
            main_author = record_get_field_value(bibrecord,
                                                 tag="710",
                                                 ind1="",
                                                 ind2="",
                                                 code="g")
            main_author = "".join([p for p in main_author.split()
                                   if p.lower() != "collaboration"])

        if not main_author:
            # Try with corporate author
            main_author = record_get_field_value(bibrecord,
                                                 tag="100",
                                                 ind1="",
                                                 ind2="",
                                                 code="a")
            if not main_author:
                raise TexkeyNoAuthorError

        # Remove utf-8 special characters
        main_author = unidecode(main_author.decode('utf-8'))
        try:
            texkey_first_part = main_author.split(',')[0].replace(" ", "")
        except KeyError:
            texkey_first_part = ""

        year = record_get_field_value(bibrecord,
                                      tag="269",
                                      ind1="",
                                      ind2="",
                                      code="c")
        if not year:
            year = record_get_field_value(bibrecord,
                                          tag="260",
                                          ind1="",
                                          ind2="",
                                          code="c")
            if not year:
                year = record_get_field_value(bibrecord,
                                              tag="773",
                                              ind1="",
                                              ind2="",
                                              code="y")
                if not year:
                    year = record_get_field_value(bibrecord,
                                                  tag="502",
                                                  ind1="",
                                                  ind2="",
                                                  code="d")

                    if not year:
                        raise TexkeyNoYearError

        try:
            texkey_second_part = year.split("-")[0]
        except KeyError:
            texkey_second_part = ""

        texkey_third_part = _texkey_random_chars(recid)

        texkey = texkey_first_part + ":" + \
            texkey_second_part + texkey_third_part

        tries = 0
        while self._value_exists(texkey) and tries < TEXKEY_MAXTRIES:
            # Key is already in the DB, generate a new one
            texkey_third_part = _texkey_random_chars(recid, use_random=True)
            texkey = texkey_first_part + ":" + \
                texkey_second_part + texkey_third_part
            tries += 1

        return texkey
コード例 #26
0
ファイル: crossref.py プロジェクト: SCOAP3/invenio
def get_doi_for_records(records):
    """
    Query crossref to obtain the DOI of a set of records

    @params records: List of records
    @returns dict {record_id : doi}
    """
    from itertools import islice, chain

    def batch(iterable, size):
        sourceiter = iter(iterable)
        while True:
            batchiter = islice(sourceiter, size)
            yield chain([batchiter.next()], batchiter)

    pipes = []
    for record in records:
        data = [
            "", # ISSN
            "", # JOURNAL TITLE (773__p)
            "", # AUTHOR (Family name of 100__a)
            "", # VOLUME (773__v)
            "", # ISSUE (773__n)
            "", # PAGE (773__c)
            "", # YEAR  (773__y)
            "", # RESOURCE TYPE
            "", # KEY
            ""  # DOI
        ]

        full_author = record_get_field_value(record, "100", "", "", "a").split(",")
        if len(full_author) > 0:
            data[2] = full_author[0]

        data[8] = str(record["001"][0][3])

        for subfield, position in ("p", 1), ("v", 3), ("n", 4), ("c", 5), ("y", 6):
            for tag, ind1, ind2 in [("773", "", "")]:
                val = record_get_field_value(record, tag, ind1, ind2, subfield)
                if val:
                    if subfield == "c":
                        # strip page range to send only starting page
                        if '-' in val:
                            val = val.split('-')[0]
                    data[position] = val
                    break

        if not data[1] or not data[3] or not data[5]:
            continue  # We need journal title, volume and page

        pipes.append("|".join(data))

    dois = {}
    if len(pipes) > 0:
        for batchpipes in batch(pipes, 10):
            params = {
                "usr": CFG_CROSSREF_USERNAME,
                "pwd": CFG_CROSSREF_PASSWORD,
                "format": "unixref",
                "qdata": "\n".join(batchpipes)
            }
            url = "http://doi.crossref.org/servlet/query"
            data = urllib.urlencode(params)

            retry_attempt = 0

            while retry_attempt < 10:
                try:
                    document = parse(CROSSREF_OPENER.open(url, data))
                    break
                except (urllib2.URLError, urllib2.HTTPError):
                    sleep(5)
                    retry_attempt += 1

            results = document.getElementsByTagName("doi_record")

            for result in results:
                record_id = result.getAttribute("key")
                doi_tags = result.getElementsByTagName("doi")
                if len(doi_tags) == 1:
                    dois[record_id] = doi_tags[0].firstChild.nodeValue

            # Avoid sending too many requests
            sleep(0.5)
    return dois
コード例 #27
0
    def verify_revision(self, verify_record, original_record, opt_mode=None):
        """
        Compares the upload record with the same 005 record from archive.

        Once the changes are identified, The latest revision of the record is fetched
        from the system and the identified changes are applied over the latest.

        Returns record patch in case of non-conflicting addition/modification/deletion
        Conflicting records raise Error and stops the bibupload process
        """

        upload_rev = ''
        original_rev = ''
        r_date = ''
        record_patch = {}

        # No need for revision check for other operations
        if opt_mode not in ['replace', 'correct']:
            return

        if '001' in verify_record:
            self.rec_id = record_get_field_value(verify_record, '001')

        # Retrieving Revision tags for comparison
        if '005' in verify_record:
            upload_rev = record_get_field_value(verify_record, '005')
            r_date = upload_rev.split('.')[0]

            if r_date not in [k[1] for k in get_record_revisions(self.rec_id)]:
                raise InvenioBibUploadInvalidRevisionError(self.rec_id, r_date)
        else:
            raise InvenioBibUploadMissing005Error(self.rec_id)

        if '005' in original_record:
            original_rev = record_get_field_value(original_record, '005')
        else:
            raise InvenioBibUploadMissing005Error(self.rec_id)

        # Retrieving the archived version
        marc_xml = get_marcxml_of_record_revision(self.rec_id, r_date)
        res = create_record(zlib.decompress(marc_xml[0][0]))
        archived_record = res[0]

        # Comparing Upload and Archive record
        curr_patch = self.compare_records(verify_record, archived_record,
                                          opt_mode)

        # No changes in Upload Record compared to Archived Revision
        # Raising Error to skip the bibupload for the record
        if not curr_patch:
            raise InvenioBibUploadUnchangedRecordError(self.rec_id, upload_rev)

        if original_rev == upload_rev:
            # Upload, Archive and Original Records have same Revisions.
            affected_tags = self.retrieve_affected_tags_with_ind(curr_patch)
            return ('correct',
                    self.generate_final_patch(curr_patch,
                                              self.rec_id), affected_tags)

        # Comparing Original and Archive record
        orig_patch = self.compare_records(original_record, archived_record,
                                          opt_mode)

        # Checking for conflicts
        # If no original patch - Original Record same as Archived Record
        if orig_patch:
            curr_patch = self.detect_conflict(verify_record, curr_patch, upload_rev, \
                                                original_record, orig_patch, original_rev)

        record_patch = self.generate_final_patch(curr_patch, self.rec_id)
        affected_tags = self.retrieve_affected_tags_with_ind(curr_patch)

        # Returning patch in case of no conflicting fields
        return ('correct', record_patch, affected_tags)