Beispiel #1
0
def main():
    from invenio.legacy.search_engine import get_record
    from invenio.legacy.bibupload.engine import (
        bibupload,
    )
    from invenio.legacy.bibrecord import (
        record_add_field,
        record_delete_field,
    )

    # Loop through list of records
    for r in RECORDS:
        old_rec = get_record(r)
        rec = get_record(r)

        if not rec:
            break

        print('Processing record: {0}'.format(r))
        # pprint(rec)

        old_690 = [f[0] for f in rec.get('690', [])]
        new_690 = []
        for f in old_690:
            a = f[0]
            b = f[1]
            t = [a, (b[0], VALUES.get(r))] if (a[0] == 'a' and
                                               a[1] == 'language_code' and
                                               b[0] == 'b' and
                                               VALUES.get(r)) \
                else f
            new_690.append(t)

        if not new_690 == old_690:
            record_delete_field(rec, '690')
            for f in new_690:
                record_add_field(rec, '690', subfields=f)

            # pprint(rec)
            print('\nOld 690:')
            pprint(old_rec.get('690'))
            print('\nNew 690:')
            pprint(rec.get('690'))

            if raw_input('Bibupload (y/n)? ') == 'y':
                bibupload(rec, 'delete')
                sleep(5)
                bibupload(rec, 'replace')
Beispiel #2
0
def match_all_subfields_for_tag(recID, field_tag, subfields_required=[]):
    """
    Tests whether the record with recID has at least one field with 'field_tag'
    where all of the required subfields in subfields_required match a subfield
    in the given field both in code and value

    @param recID: record ID
    @type recID: int

    @param field_tag: a 3 digit code for the field tag code
    @type field_tag: string

    @param subfields_required: a list of subfield code/value tuples
    @type subfields_required: list of tuples of strings.
        same format as in get_record():
            e.g. [('w', 't'),
                  ('4', 'XYZ123')]

    @return: boolean
    """
    rec = get_record(recID)
    for field in rec[field_tag]:
        subfields_present = field[0]
        intersection = set(subfields_present) & set(subfields_required)
        if set(subfields_required) == intersection:
            return True
    return False
def match_all_subfields_for_tag(recID, field_tag, subfields_required=[]):
    """
    Tests whether the record with recID has at least one field with 'field_tag'
    where all of the required subfields in subfields_required match a subfield
    in the given field both in code and value

    @param recID: record ID
    @type recID: int

    @param field_tag: a 3 digit code for the field tag code
    @type field_tag: string

    @param subfields_required: a list of subfield code/value tuples
    @type subfields_required: list of tuples of strings.
        same format as in get_record():
            e.g. [('w', 't'),
                  ('4', 'XYZ123')]

    @return: boolean
    """
    rec = get_record(recID)
    for field in rec[field_tag]:
        subfields_present = field[0]
        intersection = set(subfields_present) & set(subfields_required)
        if set(subfields_required) == intersection:
            return True
    return False
Beispiel #4
0
def replace_references(recid):
    """Replace references for a record

    The record itself is not updated, the marc xml of the document with updated
    references is returned

    Parameters:
    * recid: the id of the record
    """
    # Parse references
    references_xml = extract_references_from_record_xml(recid)
    references = create_record(references_xml)
    # Record marc xml
    record = get_record(recid)

    if references[0]:
        fields_to_add = record_get_field_instances(references[0],
                                                   tag='999',
                                                   ind1='%',
                                                   ind2='%')
        # Replace 999 fields
        record_delete_fields(record, '999')
        record_add_fields(record, '999', fields_to_add)
        # Update record references
        out_xml = record_xml_output(record)
    else:
        out_xml = None

    return out_xml
Beispiel #5
0
def update_references(recid, overwrite=True):
    """Update references for a record

    First, we extract references from a record.
    Then, we are not updating the record directly but adding a bibupload
    task in -c mode which takes care of updating the record.

    Parameters:
    * recid: the id of the record
    """

    if not overwrite:
        # Check for references in record
        record = get_record(recid)
        if record and record_has_field(record, '999'):
            raise RecordHasReferences('Record has references and overwrite '
                                      'mode is disabled: %s' % recid)

    if get_fieldvalues(recid, '999C59'):
        raise RecordHasReferences('Record has been curated: %s' % recid)

    # Parse references
    references_xml = extract_references_from_record_xml(recid)

    # Save new record to file
    (temp_fd, temp_path) = mkstemp(prefix=CFG_REFEXTRACT_FILENAME,
                                   dir=CFG_TMPSHAREDDIR)
    temp_file = os.fdopen(temp_fd, 'w')
    temp_file.write(references_xml)
    temp_file.close()

    # Update record
    task_low_level_submission('bibupload', 'refextract', '-P', '4',
                              '-c', temp_path)
Beispiel #6
0
def perform_get_holdings_information(recid,
                                     req,
                                     action="borrowal",
                                     ln=CFG_SITE_LANG):
    """
    Display all the copies of an item. If the parameter action is 'proposal', display
    appropriate information to the user.

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @param action: Specifies whether the current record is put up to solicit acquisition
    proposals(if "proposal") or not("borrowal").
    @type proposal: string

    @return body(html)
    """
    _ = gettext_set_language(ln)

    if action == "proposal":
        tag = AMZ_BOOK_PUBLICATION_DATE_TAG
        publication_date = record_get_field_value(get_record(recid),
                                                  tag[:3],
                                                  ind1=tag[3],
                                                  ind2=tag[4],
                                                  code=tag[5])
        msg = ''
        if publication_date:
            cur_date = datetime.date.today()
            try:
                pub_date = time.strptime(publication_date, '%d %b %Y')
                pub_date = datetime.date(pub_date[0], pub_date[1], pub_date[2])
                if cur_date < pub_date:
                    msg += _(
                        "The publication date of this book is %(x_date)s.",
                        x_date=(publication_date))
                    msg += "<br /><br />"
                else:
                    msg += _("This book has no copies in the library. ")
            except:
                msg += _("This book has no copies in the library. ")

        msg += _(
            "If you think this book is interesting, suggest it and tell us why you consider this \
                  book is important. The library will consider your opinion and if we decide to buy the \
                  book, we will issue a loan for you as soon as it arrives and send it by internal mail."
        )
        msg += "<br \><br \>"
        msg += _(
            "In case we decide not to buy the book, we will offer you an interlibrary loan"
        )

        body = bc_templates.tmpl_book_proposal_information(recid, msg, ln=ln)
    else:
        holdings_information = db.get_holdings_information(recid, False)
        body = bc_templates.tmpl_holdings_information(
            recid=recid, req=req, holdings_info=holdings_information, ln=ln)

    return body
Beispiel #7
0
    def check_arxiv(recid):
        record = get_record(recid)

        for report_tag in record_get_field_instances(record, "037"):
            for category in field_get_subfield_values(report_tag, 'a'):
                if category.startswith('arXiv'):
                    return True
        return False
Beispiel #8
0
    def check_arxiv(recid):
        record = get_record(recid)

        for report_tag in record_get_field_instances(record, "037"):
            for category in field_get_subfield_values(report_tag, 'a'):
                if category.startswith('arXiv'):
                    return True
        return False
Beispiel #9
0
def get_bibrecord(recid):
    """Return record in BibRecord wrapping."""
    if record_exists(recid):
        record_revision_ids = get_record_revision_ids(recid)
        if record_revision_ids:
            return create_record(get_marcxml_of_revision_id(max(record_revision_ids)))[0]
        else:
            return get_record(recid)
 def get_recstruct_record(recid):
     value = serialize_via_marshal(get_record(recid))
     b = Bibfmt(id_bibrec=recid,
                format='recstruct',
                last_updated=db.func.now(),
                value=value)
     db.session.add(b)
     db.session.commit()
Beispiel #11
0
def get_bibrecord(recid):
    """Return record in BibRecord wrapping."""
    if record_exists(recid):
        record_revision_ids = get_record_revision_ids(recid)
        if record_revision_ids:
            return create_record(get_marcxml_of_revision_id(max(record_revision_ids)))[0]
        else:
            return get_record(recid)
Beispiel #12
0
def resolve_doi(req, doi, ln=CFG_SITE_LANG, verbose=0):
    """
    Redirect to given DOI, or display error page when DOI cannot be
    resolved.
    """
    _ = gettext_set_language(ln)
    # Fetch user ID:
    try:
        uid = getUid(req)
    except Error:
        register_exception(req=req, alert_admin=True)
        return page(title=_("Internal Error"),
                    body=create_error_box(req, verbose=verbose, ln=ln),
                    description="%s - Internal Error" % CFG_SITE_NAME,
                    keywords="%s, Internal Error" % CFG_SITE_NAME,
                    language=ln,
                    req=req,
                    navmenuid='search')
    # Resolve DOI
    recids = perform_request_search(p='doi:"%s"' % doi, of="id", verbose=verbose)
    recids = [recid for recid in recids if doi.lower() in \
              [doi.lower() for doi in get_record(recid).get('doi', '') if doi]]

    # Answer
    if len(recids) == 1:
        # Found unique matching record
        return redirect_to_url(req, CFG_SITE_URL + '/' + CFG_SITE_RECORD + '/' + str(recids[0]))
    elif len(recids) == 0:
        # No corresponding record found
        page_body = '<p>' + (_("Sorry, DOI %(x_doi)s could not be resolved.", x_doi=('<strong>' + str(doi) + '</strong>'))) + '</p>'
        if req.header_only:
            raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
        return page(title=_('DOI "%(x_doi)s" Not Found', x_doi=cgi.escape(doi)),
                    body=page_body,
                    description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(doi))),
                    keywords="%s" % CFG_SITE_NAME,
                    uid=uid,
                    language=ln,
                    req=req,
                    navmenuid='search')
    else:
        # Found multiple matching records
        try:
            raise Exception('DOI "%s" matched multiple records (%s) -- Please check' % (doi, ', '.join([str(recid) for recid in recids])))
        except Exception, e:
            register_exception(req=req, alert_admin=True)
        page_body = websearch_templates.tmpl_multiple_dois_found_page(doi, recids, ln)
        return page(title=_('Found multiple records matching DOI %(x_doi)s', x_doi=cgi.escape(doi)),
                    body=page_body,
                    description=(CFG_SITE_NAME + ' - ' + _("Found multiple records matching DOI") + ': ' + cgi.escape(str(doi))),
                    keywords="%s" % CFG_SITE_NAME,
                    uid=uid,
                    language=ln,
                    req=req,
                    navmenuid='search')
Beispiel #13
0
 def test_simple_insert(self):
     """batchuploader - robotupload simple insert"""
     from invenio.legacy.search_engine import get_record
     result = urllib2.urlopen(self.req).read()
     self.failUnless("[INFO]" in result)
     current_task = get_last_taskid()
     run_shell_command("%s/bibupload %%s" % CFG_BINDIR, [str(current_task)])
     current_recid = run_sql("SELECT MAX(id) FROM bibrec")[0][0]
     self.failIfEqual(self.last_recid, current_recid)
     record = get_record(current_recid)
     self.assertEqual(record['245'][0][0], [('a', 'The title')])
Beispiel #14
0
    def _modify_record(self,
                       recid,
                       test_func,
                       replace_func,
                       include_func,
                       append_colls=[],
                       replace_colls=[]):
        """Generate record a MARCXML file.

        @param test_func: Function to test if a collection id should be changed
        @param replace_func: Function to replace the collection id.
        @param include_func: Function to test if collection should be included
        """
        from invenio.legacy.search_engine import get_record
        rec = get_record(recid)
        newcolls = []
        dirty = False

        try:
            colls = rec['980']
            if replace_colls:
                for c in replace_colls:
                    newcolls.append([('a', c)])
                    dirty = True
            else:
                for c in colls:
                    try:
                        # We are only interested in subfield 'a'
                        code, val = c[0][0]
                        if test_func(code, val):
                            c[0][0] = replace_func(code, val)
                            dirty = True
                        if include_func(code, val):
                            newcolls.append(c[0])
                        else:
                            dirty = True
                    except IndexError:
                        pass
                for c in append_colls:
                    newcolls.append([('a', c)])
                    dirty = True
        except KeyError:
            return False

        if not dirty:
            return False

        rec = {}
        record_add_field(rec, '001', controlfield_value=str(recid))

        for subfields in newcolls:
            record_add_field(rec, '980', subfields=subfields)

        return rec
Beispiel #15
0
def retrieve_field_values(curdir,
                          field_name,
                          separator=None,
                          system_number_file='SN',
                          tag=None):
    """
    This is a handy function to retrieve values either from the current
    submission directory, when a form has been just submitted, or from
    an existing record (e.g. during MBI action).

    @param curdir: is the current submission directory.
    @type curdir: string
    @param field_name: is the form field name that might exists on disk.
    @type field_name: string
    @param separator: is an optional separator. If it exists, it will be used
        to retrieve multiple values contained in the field.
    @type separator: string
    @param system_number_file: is the name of the file on disk in curdir, that
        is supposed to contain the record id.
    @type system_number_file: string
    @param tag: is the full MARC tag (tag+ind1+ind2+code) that should
        contain values. If not specified, only values in curdir will
        be retrieved.
    @type tag: 6-chars
    @return: the field value(s).
    @rtype: list of strings.

    @note: if field_name exists in curdir it will take precedence over
        retrieving the values from the record.
    """
    field_file = os.path.join(curdir, field_name)
    if os.path.exists(field_file):
        field_value = open(field_file).read()
        if separator is not None:
            return [
                value.strip() for value in field_value.split(separator)
                if value.strip()
            ]
        else:
            return [field_value.strip()]
    elif tag is not None:
        system_number_file = os.path.join(curdir, system_number_file)
        if os.path.exists(system_number_file):
            recid = int(open(system_number_file).read().strip())
            record = get_record(recid)
            if separator:
                return record_get_field_values(record, tag[:3], tag[3], tag[4],
                                               tag[5])
            else:
                return [
                    record_get_field_value(record, tag[:3], tag[3], tag[4],
                                           tag[5])
                ]
    return []
Beispiel #16
0
def get_record_collections(recid=0, recstruct=None):
    """ Returns all collections of a record, field 980
    @param recid: record id to get collections from
    @type: string

    @return: list of collections
    @rtype: list
    """
    if not recstruct:
        recstruct = get_record(recid)
    return [collection for collection in record_get_field_values(recstruct, tag="980", ind1=" ", ind2=" ", code="a")]
Beispiel #17
0
 def test_simple_insert(self):
     """batchuploader - robotupload simple insert"""
     if CFG_LOCALHOST_OK:
         from invenio.legacy.search_engine import get_record
         result = urllib2.urlopen(self.req).read()
         self.failUnless("[INFO]" in result)
         current_task = get_last_taskid()
         run_shell_command("%s/bibupload %%s" % cfg['CFG_BINDIR'], [str(current_task)])
         current_recid = run_sql("SELECT MAX(id) FROM bibrec")[0][0]
         self.failIfEqual(self.last_recid, current_recid)
         record = get_record(current_recid)
         self.assertEqual(record['245'][0][0], [('a', 'The title')])
Beispiel #18
0
def can_record_have_physical_copies(recid):
    """Determine if the record can have physical copies
    (addable through the bibCirculation module).
    The information is derieved using the tabs displayed for a given record.
    Only records already saved within the collection may have the physical copies
    @return: True or False
    """
    if get_record(recid) is None:
        return False

    col_id = Collection.query.filter_by(name=guess_primary_collection_of_a_record(recid)).value("id")
    return False
Beispiel #19
0
def _get_updated_record(record_id, update_commands):
    """Applies all the changes specified by the commands
    to record identified by record_id and returns resulting record

    @param record_id: identifier of the record that will be updated
    @param update_commands: list of commands used to update record contents
    @return: updated record structure"""

    record = search_engine.get_record(recid=record_id)
    for current_command in update_commands:
        current_command.process_record(record)

    return record
Beispiel #20
0
def can_record_have_physical_copies(recid):
    """Determine if the record can have physical copies
    (addable through the bibCirculation module).
    The information is derieved using the tabs displayed for a given record.
    Only records already saved within the collection may have the physical copies
    @return: True or False
    """
    if get_record(recid) is None:
        return False

    col_id = Collection.query.filter_by(
        name=guess_primary_collection_of_a_record(recid)).value('id')
    return False
Beispiel #21
0
def get_record_collections(recid=0, recstruct=None):
    """ Returns all collections of a record, field 980
    @param recid: record id to get collections from
    @type: string

    @return: list of collections
    @rtype: list
    """
    if not recstruct:
        recstruct = get_record(recid)
    return [
        collection for collection in record_get_field_values(
            recstruct, tag="980", ind1=" ", ind2=" ", code="a")
    ]
Beispiel #22
0
    def _modify_record(self, recid, test_func, replace_func, include_func,
                       append_colls=[], replace_colls=[]):
        """
        Generate record a MARCXML file

        @param test_func: Function to test if a collection id should be changed
        @param replace_func: Function to replace the collection id.
        @param include_func: Function to test if collection should be included
        """
        from invenio.legacy.search_engine import get_record
        rec = get_record(recid)
        newcolls = []
        dirty = False

        try:
            colls = rec['980']
            if replace_colls:
                for c in replace_colls:
                    newcolls.append([('a', c)])
                    dirty = True
            else:
                for c in colls:
                    try:
                        # We are only interested in subfield 'a'
                        code, val = c[0][0]
                        if test_func(code, val):
                            c[0][0] = replace_func(code, val)
                            dirty = True
                        if include_func(code, val):
                            newcolls.append(c[0])
                        else:
                            dirty = True
                    except IndexError:
                        pass
                for c in append_colls:
                    newcolls.append([('a', c)])
                    dirty = True
        except KeyError:
            return False

        if not dirty:
            return False

        rec = {}
        record_add_field(rec, '001', controlfield_value=str(recid))

        for subfields in newcolls:
            record_add_field(rec, '980', subfields=subfields)

        return rec
Beispiel #23
0
def perform_get_holdings_information(recid, req, action="borrowal", ln=CFG_SITE_LANG):
    """
    Display all the copies of an item. If the parameter action is 'proposal', display
    appropriate information to the user.

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @param action: Specifies whether the current record is put up to solicit acquisition
    proposals(if "proposal") or not("borrowal").
    @type proposal: string

    @return body(html)
    """
    _ = gettext_set_language(ln)

    if action == "proposal":
        tag = AMZ_BOOK_PUBLICATION_DATE_TAG
        publication_date = record_get_field_value(get_record(recid), tag[:3],
                                                  ind1=tag[3], ind2=tag[4],
                                                  code=tag[5])
        msg = ''
        if publication_date:
            cur_date = datetime.date.today()
            try:
                pub_date = time.strptime(publication_date, '%d %b %Y')
                pub_date = datetime.date(pub_date[0], pub_date[1], pub_date[2])
                if cur_date < pub_date:
                    msg += _("The publication date of this book is %(x_date)s.", x_date=(publication_date))
                    msg += "<br /><br />"
                else:
                    msg += _("This book has no copies in the library. ")
            except:
                msg += _("This book has no copies in the library. ")

        msg += _("If you think this book is interesting, suggest it and tell us why you consider this \
                  book is important. The library will consider your opinion and if we decide to buy the \
                  book, we will issue a loan for you as soon as it arrives and send it by internal mail.")
        msg += "<br \><br \>"
        msg += _("In case we decide not to buy the book, we will offer you an interlibrary loan")

        body = bc_templates.tmpl_book_proposal_information(recid, msg, ln=ln)
    else:
        holdings_information = db.get_holdings_information(recid, False)
        body = bc_templates.tmpl_holdings_information(recid=recid,
                                            req=req,
                                            holdings_info=holdings_information,
                                            ln=ln)

    return body
Beispiel #24
0
    def get_record(self):
        """
        Returns the record structure of this L{BibFormatObject} instance

        @return: the record structure as defined by BibRecord library
        """
        from invenio.legacy.search_engine import get_record

        # Create record if necessary
        if self.record is None:
            # on-the-fly creation if current output is xm
            self.record = get_record(self.recID)

        return self.record
Beispiel #25
0
def _get_formated_record(record_id, output_format, update_commands, language, outputTags="",
                         checked=True, displayed_records=None):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    @param outputTags: the tags to be shown to the user
    @param checked: is the record checked by the user?
    @param displayed_records: records to be displayed on a given page

    @returns: record formated to be displayed or None
    """
    if update_commands and checked:
        # Modify the bibrecord object with the appropriate actions
        updated_record = _get_updated_record(record_id, update_commands)

    textmarc_options = {"aleph-marc":0, "correct-mode":1, "append-mode":0,
                        "delete-mode":0, "insert-mode":0, "replace-mode":0,
                        "text-marc":1}

    if record_id not in displayed_records:
        return

    old_record = search_engine.get_record(recid=record_id)
    old_record_textmarc = xmlmarc2textmarc.create_marc_record(old_record, sysno="", options=textmarc_options)
    if "hm" == output_format:
        if update_commands and checked:
            updated_record_textmarc = xmlmarc2textmarc.create_marc_record(updated_record, sysno="", options=textmarc_options)
            result = _get_record_diff(old_record_textmarc, updated_record_textmarc, outputTags, record_id)
        else:
            filter_tags = "All tags" not in outputTags and outputTags
            result = ['<pre>']
            for line in old_record_textmarc.splitlines():
                if not filter_tags or line.split()[0].replace('_', '') in outputTags:
                    result.append("%09d " % record_id + line.strip())
            result.append('</pre>')
            result = '\n'.join(result)
    else:
        if update_commands and checked:
            # No coloring of modifications in this case
            xml_record = bibrecord.record_xml_output(updated_record)
        else:
            xml_record = bibrecord.record_xml_output(old_record)
        result = bibformat.format_record(recID=None,
                                        of=output_format,
                                        xml_record=xml_record,
                                        ln=language)
    return result
Beispiel #26
0
def get_current_record(curdir, system_number_file='SN'):
    """
    Return the current record (in case it's being modified).

    @param curdir: the path to the current directory.
    @type curdir: string
    @param system_number_file: is the name of the file on disk in curdir, that
        is supposed to contain the record id.
    @type system_number_file: string
    @return: the record
    @rtype: as in L{get_record}
    """
    if os.path.exists(os.path.join(curdir, system_number_file)):
        recid = open(os.path.join(curdir, system_number_file)).read().strip()
        if recid:
            recid = int(recid)
            return get_record(recid)
    return {}
Beispiel #27
0
def can_record_have_physical_copies(recid):
    """Determine if the record can have physical copies
    (addable through the bibCirculation module).
    The information is derieved using the tabs displayed for a given record.
    Only records already saved within the collection may have the physical copies
    @return: True or False
    """
    if get_record(recid) is None:
        return False

    col_id = get_colID(guess_primary_collection_of_a_record(recid))
    collections = get_detailed_page_tabs(col_id, recid)

    if ("holdings" not in collections
            or "visible" not in collections["holdings"]):
        return False

    return collections["holdings"]["visible"] is True
Beispiel #28
0
def get_current_record(curdir, system_number_file='SN'):
    """
    Return the current record (in case it's being modified).

    @param curdir: the path to the current directory.
    @type curdir: string
    @param system_number_file: is the name of the file on disk in curdir, that
        is supposed to contain the record id.
    @type system_number_file: string
    @return: the record
    @rtype: as in L{get_record}
    """
    if os.path.exists(os.path.join(curdir, system_number_file)):
        recid = open(os.path.join(curdir, system_number_file)).read().strip()
        if recid:
            recid = int(recid)
            return get_record(recid)
    return {}
Beispiel #29
0
def can_record_have_physical_copies(recid):
    """Determine if the record can have physical copies
    (addable through the bibCirculation module).
    The information is derieved using the tabs displayed for a given record.
    Only records already saved within the collection may have the physical copies
    @return: True or False
    """
    if get_record(recid) is None:
        return False

    col_id = get_colID(guess_primary_collection_of_a_record(recid))
    collections = get_detailed_page_tabs(col_id, recid)

    if ("holdings" not in collections or
            "visible" not in collections["holdings"]):
        return False

    return collections["holdings"]["visible"] is True
Beispiel #30
0
def update(recid, form):
    if not is_record_editable(recid):
        abort(401)

    from invenio.legacy.search_engine import get_record
    from invenio.legacy.bibupload.engine import bibupload
    from invenio.modules.formatter import engine as bibformat_engine

    bfo = bibformat_engine.BibFormatObject(recid)
    domain = read_basic_metadata_field_from_marc(bfo, 'domain')
    metaclass, meta, meta_form = _get_meta_form_data(domain, form)

    if meta_form.validate_on_submit():
        current_app.logger.info("Updating record {}".format(recid))

        _bibdoc_modify_files(recid, form)

        rec_changes = {}
        add_basic_fields(rec_changes, form, meta)
        updated = False

        rec = get_record(recid)
        for (k, v) in rec_changes.items():
            if rec.get(k) != v:
                current_app.logger.info("Updating key {} from {} to {}".format(
                    k, rec.get(k), v))
                rec[k] = v
                updated = True

        if updated:
            bibupload(rec, 'replace')

        return jsonify(valid=True,
                       newurl=url_for("record.metadata", recid=recid),
                       html=render_template('record_waitforit.html',
                                            recid=recid))
    else:
        html = render_template('b2share-addmeta-table.html',
                               recid=recid,
                               metadata=meta,
                               form=meta_form,
                               domain=metaclass,
                               getattr=getattr)
        return jsonify(valid=False, html=html)
Beispiel #31
0
def retrieve_field_values(curdir, field_name, separator=None, system_number_file='SN', tag=None):
    """
    This is a handy function to retrieve values either from the current
    submission directory, when a form has been just submitted, or from
    an existing record (e.g. during MBI action).

    @param curdir: is the current submission directory.
    @type curdir: string
    @param field_name: is the form field name that might exists on disk.
    @type field_name: string
    @param separator: is an optional separator. If it exists, it will be used
        to retrieve multiple values contained in the field.
    @type separator: string
    @param system_number_file: is the name of the file on disk in curdir, that
        is supposed to contain the record id.
    @type system_number_file: string
    @param tag: is the full MARC tag (tag+ind1+ind2+code) that should
        contain values. If not specified, only values in curdir will
        be retrieved.
    @type tag: 6-chars
    @return: the field value(s).
    @rtype: list of strings.

    @note: if field_name exists in curdir it will take precedence over
        retrieving the values from the record.
    """
    field_file = os.path.join(curdir, field_name)
    if os.path.exists(field_file):
        field_value = open(field_file).read()
        if separator is not None:
            return [value.strip() for value in field_value.split(separator) if value.strip()]
        else:
            return [field_value.strip()]
    elif tag is not None:
        system_number_file = os.path.join(curdir, system_number_file)
        if os.path.exists(system_number_file):
            recid = int(open(system_number_file).read().strip())
            record = get_record(recid)
            if separator:
                return record_get_field_values(record, tag[:3], tag[3], tag[4], tag[5])
            else:
                return [record_get_field_value(record, tag[:3], tag[3], tag[4], tag[5])]
    return []
Beispiel #32
0
def get_record_provenance(recid):
    """
    Return the provenance XML representation of a record, suitable to be put
    in the about tag.
    """
    record = get_record(recid)
    provenances = record_get_field_instances(
        record, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3],
        CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3],
        CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4])
    out = ""
    for provenance in provenances:
        base_url = identifier = datestamp = metadata_namespace = origin_description = harvest_date = altered = ""
        for (code, value) in provenance[0]:
            if code == CFG_OAI_PROVENANCE_BASEURL_SUBFIELD:
                base_url = value
            elif code == CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5]:
                identifier = value
            elif code == CFG_OAI_PROVENANCE_DATESTAMP_SUBFIELD:
                datestamp = value
            elif code == CFG_OAI_PROVENANCE_METADATANAMESPACE_SUBFIELD:
                metadata_namespace = value
            elif code == CFG_OAI_PROVENANCE_ORIGINDESCRIPTION_SUBFIELD:
                origin_description = value
            elif code == CFG_OAI_PROVENANCE_HARVESTDATE_SUBFIELD:
                harvest_date = value
            elif code == CFG_OAI_PROVENANCE_ALTERED_SUBFIELD:
                altered = value
        if base_url:
            out += """<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">"""
            out += X.originDescription(
                harvestDate=harvest_date, altered=altered)(
                    X.baseURL()(base_url),
                    X.identifier()(identifier),
                    X.datestamp()(datestamp),
                    X.metadataNamespace()(metadata_namespace),
                    origin_description
                    and X.originDescription(origin_description)
                    or ''  ## This is already XML
                )
            out += """</provenance>"""
    return out
Beispiel #33
0
def main():
    from invenio.legacy.search_engine import get_record
    from invenio.legacy.bibupload.engine import (
        bibupload,
    )
    from invenio.legacy.bibrecord import (
        create_record,
    )
    from invenio.legacy.bibedit.db_layer import get_record_revisions
    from invenio.legacy.bibedit.utils import (
        get_record_revision_ids,
        get_marcxml_of_revision,
    )

    # Loop through list of records
    for r in RECORDS:
        rec = get_record(r)

        if not rec:
            break

        print('Processing record: {0}'.format(r))
        # pprint(rec)

        print(get_record_revision_ids(r))
        print

        revs = get_record_revisions(r)
        print(revs)
        print

        for id, rev in revs[0:1]:
            marcxml = get_marcxml_of_revision(r, rev)
            # print(marcxml)
            # print
            rec = create_record(marcxml)[0]
            pprint(rec)

            if raw_input('Bibupload (y/n)? ') == 'y':
                # bibupload(rec, 'delete')
                # sleep(5)
                bibupload(rec, 'replace')
Beispiel #34
0
def load_ticket_templates(recId):
    """
    Loads all enabled ticket plugins and calls them.
    @return dictionary with the following structure:
        key: string: name of queue
        value: dict: a dictionary with 2 keys,
        the template subject and content of the queue
    @rtype dict
    """
    ticket_templates = {}
    plugins = load_ticket_plugins()
    record = get_record(recId)
    for name, plugin in plugins.items():
        if plugin:
            queue_data = plugin["get_template_data"](record)
            if queue_data:
                ticket_templates[queue_data[0]] = {"subject": queue_data[1], "content": queue_data[2]}
        else:
            raise BibEditPluginException("Plugin not valid in %s" % (name,))
    return ticket_templates
Beispiel #35
0
def load_ticket_templates(recId):
    """
    Loads all enabled ticket plugins and calls them.
    @return dictionary with the following structure:
        key: string: name of queue
        value: dict: a dictionary with 2 keys,
        the template subject and content of the queue
    @rtype dict
    """
    ticket_templates = {}
    plugins = load_ticket_plugins()
    record = get_record(recId)
    for name, plugin in plugins.items():
        if plugin:
            queue_data = plugin['get_template_data'](record)
            if queue_data:
                ticket_templates[queue_data[0]] = { 'subject' : queue_data[1], 'content' : queue_data[2] }
        else:
            raise BibEditPluginException("Plugin not valid in %s" % (name,))
    return ticket_templates
Beispiel #36
0
def get_record_provenance(recid):
    """
    Return the provenance XML representation of a record, suitable to be put
    in the about tag.
    """
    record = get_record(recid)
    provenances = record_get_field_instances(
        record,
        CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3],
        CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3],
        CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4],
    )
    out = ""
    for provenance in provenances:
        base_url = identifier = datestamp = metadata_namespace = origin_description = harvest_date = altered = ""
        for (code, value) in provenance[0]:
            if code == CFG_OAI_PROVENANCE_BASEURL_SUBFIELD:
                base_url = value
            elif code == CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5]:
                identifier = value
            elif code == CFG_OAI_PROVENANCE_DATESTAMP_SUBFIELD:
                datestamp = value
            elif code == CFG_OAI_PROVENANCE_METADATANAMESPACE_SUBFIELD:
                metadata_namespace = value
            elif code == CFG_OAI_PROVENANCE_ORIGINDESCRIPTION_SUBFIELD:
                origin_description = value
            elif code == CFG_OAI_PROVENANCE_HARVESTDATE_SUBFIELD:
                harvest_date = value
            elif code == CFG_OAI_PROVENANCE_ALTERED_SUBFIELD:
                altered = value
        if base_url:
            out += """<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">"""
            out += X.originDescription(harvestDate=harvest_date, altered=altered)(
                X.baseURL()(base_url),
                X.identifier()(identifier),
                X.datestamp()(datestamp),
                X.metadataNamespace()(metadata_namespace),
                origin_description and X.originDescription(origin_description) or "",  ## This is already XML
            )
            out += """</provenance>"""
    return out
Beispiel #37
0
    def from_recid(cls, recid, provisional=False):
        """Get user communities specified in recid."""
        from invenio.legacy.search_engine import get_record
        rec = get_record(recid)
        prefix = "%s-" % (cfg['COMMUNITIES_ID_PREFIX_PROVISIONAL']
                          if provisional else cfg['COMMUNITIES_ID_PREFIX'])

        colls = rec.get('980', [])
        usercomm = []
        for c in colls:
            try:
                # We are only interested in subfield 'a'
                code, val = c[0][0]
                if code == 'a' and val.startswith(prefix):
                    val = val[len(prefix):]
                    u = cls.query.filter_by(id=val).first()
                    if u:
                        usercomm.append(u)
            except IndexError:
                pass
        return usercomm
Beispiel #38
0
def update(recid, form):
    if not is_record_editable(recid):
        abort(401)

    from invenio.legacy.search_engine import get_record
    from invenio.legacy.bibupload.engine import bibupload
    from invenio.modules.formatter import engine as bibformat_engine

    bfo = bibformat_engine.BibFormatObject(recid)
    domain = read_basic_metadata_field_from_marc(bfo, 'domain')
    metaclass, meta, meta_form = _get_meta_form_data(domain, form)

    if meta_form.validate_on_submit():
        current_app.logger.info("Updating record {}".format(recid))

        _bibdoc_modify_files(recid, form)

        rec_changes = {}
        add_basic_fields(rec_changes, form, meta)
        updated = False

        rec = get_record(recid)
        for (k,v) in rec_changes.items():
            if rec.get(k) != v:
                current_app.logger.info(
                    "Updating key {} from {} to {}".format(k, rec.get(k),v))
                rec[k] = v
                updated = True

        if updated:
            bibupload(rec, 'replace')

        return jsonify(valid=True,
                       newurl=url_for("record.metadata", recid=recid),
                       html=render_template('record_waitforit.html', recid=recid))
    else:
        html = render_template('b2share-addmeta-table.html', recid=recid,
                                metadata=meta, form=meta_form,
                                domain=metaclass, getattr=getattr)
        return jsonify(valid=False, html=html)
Beispiel #39
0
def main():
    from invenio.legacy.search_engine import get_record
    from invenio.legacy.bibupload.engine import (
        bibupload, )
    from invenio.legacy.bibrecord import (
        create_record, )
    from invenio.legacy.bibedit.db_layer import get_record_revisions
    from invenio.legacy.bibedit.utils import (
        get_record_revision_ids,
        get_marcxml_of_revision,
    )

    # Loop through list of records
    for r in RECORDS:
        rec = get_record(r)

        if not rec:
            break

        print('Processing record: {0}'.format(r))
        # pprint(rec)

        print(get_record_revision_ids(r))
        print

        revs = get_record_revisions(r)
        print(revs)
        print

        for id, rev in revs[0:1]:
            marcxml = get_marcxml_of_revision(r, rev)
            # print(marcxml)
            # print
            rec = create_record(marcxml)[0]
            pprint(rec)

            if raw_input('Bibupload (y/n)? ') == 'y':
                # bibupload(rec, 'delete')
                # sleep(5)
                bibupload(rec, 'replace')
Beispiel #40
0
def tarballs_by_recids(recids, sdir):
    """
    Take a string representing one recid or several and get the associated
    tarballs for those ids.

    @param: recids (string): the record id or ids
    @param: sdir (string): where the tarballs should live

    @return: tarballs ([string, string, ...]): locations of tarballs
    """
    list_of_ids = []

    if ',' in recids:
        recids = recids.split(',')
        for recid in recids:
            if '-' in recid:
                low, high = recid.split('-')
                recid = range(int(low), int(high))
                list_of_ids.extend(recid)
            else:
                recid = int(recid)
                list_of_ids.append(recid)

    else:
        if '-' in recids:
            low, high = recid.split('-')
            list_of_ids = range(int(low), int(high))
        else:
            list_of_ids = int(recid)

    arXiv_ids = []

    for recid in list_of_ids:
        rec = get_record(recid)
        for afieldinstance in record_get_field_instances(rec, tag='037'):
            if 'arXiv' == field_get_subfield_values(afieldinstance, '9')[0]:
                arXiv_id = field_get_subfield_values(afieldinstance, 'a')[0]
                arXiv_ids.append(arXiv_id)

    return tarballs_by_arXiv_id(arXiv_ids, sdir)
    def tokenize(self, recID):
        phrases = []
        try:
            rec = get_record(recID)

            for rule in self.rules:
                tag_to_index, necessary_tag, necessary_value = rule
                core_tag = tag_to_index[0:3]
                ind = tag_to_index[3:5]
                sub_tag = tag_to_index[5]

                fields = [dict(instance[0]) for instance in record_get_field_instances(rec, core_tag, ind[0], ind[1])]
                for field in fields:
                    tag_condition = necessary_tag and field.has_key(necessary_tag) or necessary_tag == ''
                    value_condition = necessary_value and field.get(necessary_tag, '') == necessary_value or \
                                      necessary_value == ''
                    if tag_condition and field.has_key(sub_tag) and value_condition:
                        phrases.append(field[sub_tag])
            return phrases
        except KeyError:
            return []
        return phrases
Beispiel #42
0
    def from_recid(cls, recid, provisional=False):
        """ Get user communities specified in recid """
        from invenio.legacy.search_engine import get_record
        rec = get_record(recid)
        prefix = "%s-" % (
            cfg['COMMUNITIES_ID_PREFIX_PROVISIONAL']
            if provisional else cfg['COMMUNITIES_ID_PREFIX'])

        colls = rec.get('980', [])
        usercomm = []
        for c in colls:
            try:
                # We are only interested in subfield 'a'
                code, val = c[0][0]
                if code == 'a' and val.startswith(prefix):
                    val = val[len(prefix):]
                    u = Community.query.filter_by(id=val).first()
                    if u:
                        usercomm.append(u)
            except IndexError:
                pass
        return usercomm
Beispiel #43
0
def task_run_core():
    """Perform a search to find records without a texkey.

    generates a new one and uploads the changes in chunks
    """
    recids = perform_request_search(
        p='-035:spirestex -035:inspiretex', cc='HEP')

    write_message("Found %s records to assign texkeys" % len(recids))
    processed_recids = []
    xml_to_process = []
    for count, recid in enumerate(recids):
        write_message("processing recid %s" % recid)

        # Check that the record does not have already a texkey
        has_texkey = False
        recstruct = get_record(recid)
        for instance in record_get_field_instances(recstruct, tag="035",
                                                   ind1="", ind2=""):
            try:
                provenance = field_get_subfield_values(instance, "9")[0]
            except IndexError:
                provenance = ""
            try:
                value = field_get_subfield_values(instance, "z")[0]
            except IndexError:
                try:
                    value = field_get_subfield_values(instance, "a")[0]
                except IndexError:
                    value = ""
            provenances = ["SPIRESTeX", "INSPIRETeX"]
            if provenance in provenances and value:
                has_texkey = True
                write_message(
                    "INFO: Record %s has already texkey %s" % (recid, value))

        if not has_texkey:
            TexKeySeq = TexkeySeq()
            new_texkey = ""
            try:
                new_texkey = TexKeySeq.next_value(recid)
            except TexkeyNoAuthorError:
                write_message((
                    "WARNING: Record %s has no first author or "
                    "collaboration") % recid)
                continue
            except TexkeyNoYearError:
                write_message("WARNING: Record %s has no year" % recid)
                continue
            write_message("Created texkey %s for record %d" %
                          (new_texkey, recid))
            xml = create_xml(recid, new_texkey)
            processed_recids.append(recid)
            xml_to_process.append(xml)

        task_update_progress("Done %d out of %d." % (count, len(recids)))
        task_sleep_now_if_required()

    # sequence ID to be used in all subsequent tasks
    sequence_id = str(random.randrange(1, 4294967296))
    if xml_to_process:
        process_chunk(xml_to_process, sequence_id)

    # Finally, index all the records processed
    # FIXME: Waiting for sequence id to be fixed
    # if processed_recids:
    #     submit_bibindex_task(processed_recids, sequence_id)

    return True
Beispiel #44
0
def task_run_core():
    """ Performs a search to find records without a texkey, generates a new
    one and uploads the changes in chunks """
    recids = perform_request_search(p='-035:spirestex -035:inspiretex',
                                    cc='HEP')

    write_message("Found %s records to assign texkeys" % len(recids))
    processed_recids = []
    xml_to_process = []
    for count, recid in enumerate(recids):
        write_message("processing recid %s" % recid)

        # Check that the record does not have already a texkey
        has_texkey = False
        recstruct = get_record(recid)
        for instance in record_get_field_instances(recstruct,
                                                   tag="035",
                                                   ind1="",
                                                   ind2=""):
            try:
                provenance = field_get_subfield_values(instance, "9")[0]
            except IndexError:
                provenance = ""
            try:
                value = field_get_subfield_values(instance, "z")[0]
            except IndexError:
                try:
                    value = field_get_subfield_values(instance, "a")[0]
                except IndexError:
                    value = ""
            provenances = ["SPIRESTeX", "INSPIRETeX"]
            if provenance in provenances and value:
                has_texkey = True
                write_message("INFO: Record %s has already texkey %s" %
                              (recid, value))

        if not has_texkey:
            TexKeySeq = TexkeySeq()
            new_texkey = ""
            try:
                new_texkey = TexKeySeq.next_value(recid)
            except TexkeyNoAuthorError:
                write_message(
                    "WARNING: Record %s has no first author or collaboration" %
                    recid)
                continue
            except TexkeyNoYearError:
                write_message("WARNING: Record %s has no year" % recid)
                continue
            write_message("Created texkey %s for record %d" %
                          (new_texkey, recid))
            xml = create_xml(recid, new_texkey)
            processed_recids.append(recid)
            xml_to_process.append(xml)

        task_update_progress("Done %d out of %d." % (count, len(recids)))
        task_sleep_now_if_required()

    # sequence ID to be used in all subsequent tasks
    sequence_id = str(random.randrange(1, 4294967296))
    if xml_to_process:
        process_chunk(xml_to_process, sequence_id)

    # Finally, index all the records processed
    #FIXME: Waiting for sequence id to be fixed
    # if processed_recids:
    #     submit_bibindex_task(processed_recids, sequence_id)

    return True
Beispiel #45
0
def tarballs_by_recids(recids, sdir, docname=None, doctype=None, docformat=None):
    """
    Take a string representing one recid or several and get the associated
    tarballs for those ids. By default look for files with names matching
    the report number and with source field 'arXiv'. This can be changed
    with C{docname}, C{doctype}, C{docformat}

    @param: recids (string): the record id or ids
    @param: sdir (string): where the tarballs should live
    @param docname: select tarball for given recid(s) that match docname
    @param doctype: select tarball for given recid(s) that match doctype
    @param docformat: select tarball for given recid(s) that match docformat
    @return: tarballs ([string, string, ...]): locations of tarballs
    """
    if not recids:
        return []

    list_of_ids = []

    if ',' in recids:
        recids = recids.split(',')
        for recid in recids:
            if '-' in recid:
                low, high = recid.split('-')
                recid = range(int(low), int(high))
                list_of_ids.extend(recid)
            else:
                recid = int(recid)
                list_of_ids.append(recid)

    else:
        if '-' in recids:
            low, high = recids.split('-')
            list_of_ids = range(int(low), int(high))
        else:
            list_of_ids = [int(recids)]

    arXiv_ids = []
    local_files = []
    for recid in list_of_ids:
        rec = get_record(recid)
        if not doctype and not docname and not docformat:
            for afieldinstance in record_get_field_instances(rec, tag='037'):
                if len(field_get_subfield_values(afieldinstance, '9')) > 0:
                    if 'arXiv' == field_get_subfield_values(afieldinstance, '9')[0]:
                        arXiv_id = field_get_subfield_values(afieldinstance, 'a')[0]
                        arXiv_ids.append(arXiv_id)
        else:
            bibarchive = BibRecDocs(recid)
            all_files = bibarchive.list_latest_files()
            if doctype:
                all_files = [docfile for docfile in all_files if
                             docfile.get_type() == doctype]
            if docname:
                all_files = [docfile for docfile in all_files if
                             docfile.get_name() == docname]
            if docformat:
                all_files = [docfile for docfile in all_files if
                             docfile.get_format() == docformat]
            local_files.extend([(docfile.get_path(), recid) for docfile in all_files])

    if doctype or docname or docformat:
        return local_files

    return tarballs_by_arXiv_id(arXiv_ids, sdir)
Beispiel #46
0
def oairepositoryupdater_task():
    """Main business logic code of oai_archive"""
    no_upload = task_get_option("no_upload")
    report = task_get_option("report")

    if report > 1:
        print_repository_status(verbose=report)
        return True

    initial_snapshot = {}
    for set_spec in all_set_specs():
        initial_snapshot[set_spec] = get_set_definitions(set_spec)
    write_message("Initial set snapshot: %s" % pformat(initial_snapshot), verbose=2)

    task_update_progress("Fetching records to process")

    recids_with_oaiid = search_unit_in_bibxxx(p='*', f=CFG_OAI_ID_FIELD, type='e')
    write_message("%s recids have an OAI ID" % len(recids_with_oaiid), verbose=2)

    all_current_recids = search_unit_in_bibxxx(p='*', f=CFG_OAI_SET_FIELD, type='e')
    no_more_exported_recids = intbitset(all_current_recids)
    write_message("%s recids are currently exported" % (len(all_current_recids)), verbose=2)

    all_affected_recids = intbitset()
    all_should_recids = intbitset()
    recids_for_set = {}
    for set_spec in all_set_specs():
        if not set_spec:
            set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
        should_recids = get_recids_for_set_spec(set_spec)
        recids_for_set[set_spec] = should_recids
        no_more_exported_recids -= should_recids
        all_should_recids |= should_recids
        current_recids = search_unit_in_bibxxx(p=set_spec, f=CFG_OAI_SET_FIELD, type='e')
        write_message("%s recids should be in %s. Currently %s are in %s" % (len(should_recids), set_spec, len(current_recids), set_spec), verbose=2)
        to_add = should_recids - current_recids
        write_message("%s recids should be added to %s" % (len(to_add), set_spec), verbose=2)
        to_remove = current_recids - should_recids
        write_message("%s recids should be removed from %s" % (len(to_remove), set_spec), verbose=2)
        affected_recids = to_add | to_remove
        write_message("%s recids should be hence updated for %s" % (len(affected_recids), set_spec), verbose=2)
        all_affected_recids |= affected_recids

    missing_oaiid = all_should_recids - recids_with_oaiid
    write_message("%s recids are missing an oaiid" % len(missing_oaiid))
    write_message("%s recids should no longer be exported" % len(no_more_exported_recids))

    ## Let's add records with missing OAI ID
    all_affected_recids |= missing_oaiid | no_more_exported_recids
    write_message("%s recids should updated" % (len(all_affected_recids)), verbose=2)

    if not all_affected_recids:
        write_message("Nothing to do!")
        return True

    # Prepare to save results in a tmp file
    (fd, filename) = mkstemp(dir=CFG_TMPDIR,
                                  prefix='oairepository_' + \
                                  time.strftime("%Y%m%d_%H%M%S_",
                                                time.localtime()))
    oai_out = os.fdopen(fd, "w")
    oai_out.write("<collection>")

    tot = 0
    # Iterate over the recids
    for i, recid in enumerate(all_affected_recids):
        task_sleep_now_if_required(can_stop_too=True)
        task_update_progress("Done %s out of %s records." % \
                             (i, len(all_affected_recids)))

        write_message("Elaborating recid %s" % recid, verbose=3)
        record = get_record(recid)
        if not record:
            write_message("Record %s seems empty. Let's skip it." % recid, verbose=3)
            continue
        new_record = {}

        # Check if an OAI identifier is already in the record or
        # not.
        assign_oai_id_entry = False
        oai_id_entry = record_get_field_value(record, tag=CFG_OAI_ID_FIELD[:3], ind1=CFG_OAI_ID_FIELD[3], ind2=CFG_OAI_ID_FIELD[4], code=CFG_OAI_ID_FIELD[5])
        if not oai_id_entry:
            assign_oai_id_entry = True
            oai_id_entry = "oai:%s:%s" % (CFG_OAI_ID_PREFIX, recid)
            write_message("Setting new oai_id %s for record %s" % (oai_id_entry, recid), verbose=3)
        else:
            write_message("Already existing oai_id %s for record %s" % (oai_id_entry, recid), verbose=3)

        # Get the sets to which this record already belongs according
        # to the metadata
        current_oai_sets = set(record_get_field_values(record, tag=CFG_OAI_SET_FIELD[:3], ind1=CFG_OAI_SET_FIELD[3], ind2=CFG_OAI_SET_FIELD[4], code=CFG_OAI_SET_FIELD[5]))
        write_message("Record %s currently belongs to these oai_sets: %s" % (recid, ", ".join(current_oai_sets)), verbose=3)

        current_previous_oai_sets = set(record_get_field_values(record, tag=CFG_OAI_PREVIOUS_SET_FIELD[:3], ind1=CFG_OAI_PREVIOUS_SET_FIELD[3], ind2=CFG_OAI_PREVIOUS_SET_FIELD[4], code=CFG_OAI_PREVIOUS_SET_FIELD[5]))
        write_message("Record %s currently doesn't belong anymore to these oai_sets: %s" % (recid, ", ".join(current_previous_oai_sets)), verbose=3)

        # Get the sets that should be in this record according to
        # settings
        updated_oai_sets = set(_set for _set, _recids in iteritems(recids_for_set)
             if recid in _recids)
        write_message("Record %s now belongs to these oai_sets: %s" % (recid, ", ".join(updated_oai_sets)), verbose=3)

        updated_previous_oai_sets = set(_set for _set in (current_previous_oai_sets - updated_oai_sets) |
             (current_oai_sets - updated_oai_sets))
        write_message("Record %s now doesn't belong anymore to these oai_sets: %s" % (recid, ", ".join(updated_previous_oai_sets)), verbose=3)

        # Ok, we have the old sets and the new sets. If they are equal
        # and oai ID does not need to be added, then great, nothing to
        # change . Otherwise apply the new sets.
        if current_oai_sets == updated_oai_sets and not assign_oai_id_entry:
            write_message("Nothing has changed for record %s, let's move on!" % recid, verbose=3)
            continue # Jump to next recid

        write_message("Something has changed for record %s, let's update it!" % recid, verbose=3)
        subfields = [(CFG_OAI_ID_FIELD[5], oai_id_entry)]
        for oai_set in updated_oai_sets:
            subfields.append((CFG_OAI_SET_FIELD[5], oai_set))
        for oai_set in updated_previous_oai_sets:
            subfields.append((CFG_OAI_PREVIOUS_SET_FIELD[5], oai_set))

        record_add_field(new_record, tag="001", controlfield_value=str(recid))
        record_add_field(new_record, tag=CFG_OAI_ID_FIELD[:3], ind1=CFG_OAI_ID_FIELD[3], ind2=CFG_OAI_ID_FIELD[4], subfields=subfields)
        oai_out.write(record_xml_output(new_record))
        tot += 1
        if tot == CFG_OAI_REPOSITORY_MARCXML_SIZE:
            oai_out.write("</collection>")
            oai_out.close()
            write_message("Wrote to file %s" % filename)
            if not no_upload:
                if task_get_option("notimechange"):
                    task_low_level_submission('bibupload', 'oairepository', '-c', filename, '-n')
                else:
                    task_low_level_submission('bibupload', 'oairepository', '-c', filename)
            # Prepare to save results in a tmp file
            (fd, filename) = mkstemp(dir=CFG_TMPDIR,
                                        prefix='oairepository_' + \
                                        time.strftime("%Y%m%d_%H%M%S_",
                                                        time.localtime()))
            oai_out = os.fdopen(fd, "w")
            oai_out.write("<collection>")
            tot = 0
            task_sleep_now_if_required(can_stop_too=True)

    oai_out.write("</collection>")
    oai_out.close()
    write_message("Wrote to file %s" % filename)

    if tot > 0:
        if not no_upload:
            task_sleep_now_if_required(can_stop_too=True)
            if task_get_option("notimechange"):
                task_low_level_submission('bibupload', 'oairepository', '-c', filename, '-n')
            else:
                task_low_level_submission('bibupload', 'oairepository', '-c', filename)
    else:
        os.remove(filename)

    return True
Beispiel #47
0
 def get_recstruct_record(recid):
     value = serialize_via_marshal(get_record(recid))
     b = Bibfmt(id_bibrec=recid, format='recstruct',
                last_updated=db.func.now(), value=value)
     db.session.add(b)
     db.session.commit()
Beispiel #48
0
def record_get_keywords(record, main_field=bconfig.CFG_MAIN_FIELD, others=bconfig.CFG_OTHER_FIELDS):
    """Return a dictionary of keywordToken objects from the marc record.

     Weight is set to (0,0) if no weight can be found.

    This will load keywords from the field 653 and 695__a (which are the
    old 'DESY' keywords)

    :param record: int or marc record, if int - marc record is loaded
        from the database. If you pass record instance, keywords are
        extracted from it
    :return: tuple (found, keywords, marcxml)
        found - int indicating how many main_field keywords were found
            the other fields are not counted
        keywords - standard dictionary of keywordToken objects
        marcrec - marc record object loaded with data
    """
    keywords = {}

    if isinstance(main_field, six.string_types):
        main_field = [main_field]
    if isinstance(others, six.string_types):
        others = [others]

    if isinstance(record, int):
        rec = get_record(record)
    else:
        rec = record

    found = 0
    for m_field in main_field:
        tag, ind1, ind2 = _parse_marc_code(m_field)
        for field in rec.get(tag, []):
            keyword = ""
            weight = 0
            type = ""

            for subfield in field[0]:
                if subfield[0] == "a":
                    keyword = subfield[1]
                elif subfield[0] == "n":
                    weight = int(subfield[1])
                elif subfield[0] == "9":
                    type = subfield[1]
            if keyword:
                found += 1
                keywords[bor.KeywordToken(keyword, type=type)] = [[(0, 0) for x in range(weight)]]

    if others:
        for field_no in others:
            tag, ind1, ind2 = _parse_marc_code(field_no)
            type = "f%s" % field_no
            for field in rec.get(tag, []):
                keyword = ""
                for subfield in field[0]:
                    if subfield[0] == "a":
                        keyword = subfield[1]
                        keywords[bor.KeywordToken(keyword, type=type)] = [[(0, 0)]]
                        break

    return found, keywords, rec
Beispiel #49
0
def tarballs_by_recids(recids,
                       sdir,
                       docname=None,
                       doctype=None,
                       docformat=None):
    """
    Take a string representing one recid or several and get the associated
    tarballs for those ids. By default look for files with names matching
    the report number and with source field 'arXiv'. This can be changed
    with C{docname}, C{doctype}, C{docformat}

    @param: recids (string): the record id or ids
    @param: sdir (string): where the tarballs should live
    @param docname: select tarball for given recid(s) that match docname
    @param doctype: select tarball for given recid(s) that match doctype
    @param docformat: select tarball for given recid(s) that match docformat
    @return: tarballs ([string, string, ...]): locations of tarballs
    """
    if not recids:
        return []

    list_of_ids = []

    if ',' in recids:
        recids = recids.split(',')
        for recid in recids:
            if '-' in recid:
                low, high = recid.split('-')
                recid = range(int(low), int(high))
                list_of_ids.extend(recid)
            else:
                recid = int(recid)
                list_of_ids.append(recid)

    else:
        if '-' in recids:
            low, high = recids.split('-')
            list_of_ids = range(int(low), int(high))
        else:
            list_of_ids = [int(recids)]

    arXiv_ids = []
    local_files = []
    for recid in list_of_ids:
        rec = get_record(recid)
        if not doctype and not docname and not docformat:
            for afieldinstance in record_get_field_instances(rec, tag='037'):
                if len(field_get_subfield_values(afieldinstance, '9')) > 0:
                    if 'arXiv' == field_get_subfield_values(
                            afieldinstance, '9')[0]:
                        arXiv_id = field_get_subfield_values(
                            afieldinstance, 'a')[0]
                        arXiv_ids.append(arXiv_id)
        else:
            bibarchive = BibRecDocs(recid)
            all_files = bibarchive.list_latest_files()
            if doctype:
                all_files = [
                    docfile for docfile in all_files
                    if docfile.get_type() == doctype
                ]
            if docname:
                all_files = [
                    docfile for docfile in all_files
                    if docfile.get_name() == docname
                ]
            if docformat:
                all_files = [
                    docfile for docfile in all_files
                    if docfile.get_format() == docformat
                ]
            local_files.extend([(docfile.get_path(), recid)
                                for docfile in all_files])

    if doctype or docname or docformat:
        return local_files

    return tarballs_by_arXiv_id(arXiv_ids, sdir)
Beispiel #50
0
def record_get_keywords(record,
                        main_field=bconfig.CFG_MAIN_FIELD,
                        others=bconfig.CFG_OTHER_FIELDS):
    """Return a dictionary of keywordToken objects from the marc record.

     Weight is set to (0,0) if no weight can be found.

    This will load keywords from the field 653 and 695__a (which are the
    old 'DESY' keywords)

    :param record: int or marc record, if int - marc record is loaded
        from the database. If you pass record instance, keywords are
        extracted from it
    :return: tuple (found, keywords, marcxml)
        found - int indicating how many main_field keywords were found
            the other fields are not counted
        keywords - standard dictionary of keywordToken objects
        marcrec - marc record object loaded with data
    """
    keywords = {}

    if isinstance(main_field, six.string_types):
        main_field = [main_field]
    if isinstance(others, six.string_types):
        others = [others]

    if isinstance(record, int):
        rec = get_record(record)
    else:
        rec = record

    found = 0
    for m_field in main_field:
        tag, ind1, ind2 = _parse_marc_code(m_field)
        for field in rec.get(tag, []):
            keyword = ''
            weight = 0
            type = ''

            for subfield in field[0]:
                if subfield[0] == 'a':
                    keyword = subfield[1]
                elif subfield[0] == 'n':
                    weight = int(subfield[1])
                elif subfield[0] == '9':
                    type = subfield[1]
            if keyword:
                found += 1
                keywords[bor.KeywordToken(keyword, type=type)] = [[
                    (0, 0) for x in range(weight)
                ]]

    if others:
        for field_no in others:
            tag, ind1, ind2 = _parse_marc_code(field_no)
            type = 'f%s' % field_no
            for field in rec.get(tag, []):
                keyword = ''
                for subfield in field[0]:
                    if subfield[0] == 'a':
                        keyword = subfield[1]
                        keywords[bor.KeywordToken(keyword,
                                                  type=type)] = [[(0, 0)]]
                        break

    return found, keywords, rec
Beispiel #51
0
def main():
    import invenio.modules.editor.models
    import invenio.modules.editor.views

    from invenio.legacy.search_engine import get_record
    from invenio.legacy.bibrecord import (
        record_delete_field,
        record_add_field,
    )
    from invenio.legacy.bibupload.engine import (
        bibupload, )

    for a in itertools.count(1):
        old_rec = get_record(a)
        rec = get_record(a)

        if not rec:
            break

        print('Processing record: {0}'.format(a))

        old_337 = [f[0] for f in rec.get('337', [])]
        new_337 = old_337[:]
        new_690 = []
        new_980 = []
        for f in rec.get('980', []):
            for sf in f[0]:
                if sf[0] == 'a' and sf[1] in TYPES:
                    if [sf] not in new_337:
                        new_337.append([sf])
                else:
                    if [sf] not in new_980:
                        new_980.append([sf])

        for f in rec.get('690', []):
            sfs = f[0]
            if sfs[0][0] == 'a' and sfs[0][1] == 'ling_resource_type':
                res_type = sfs[1][1]
                if res_type in TYPES:
                    if [('a', res_type)] not in new_337:
                        new_337.append([('a', res_type)])
                else:
                    print("Unrecognized 'ling_resource_type' value! '{0}'".
                          format(res_type))
            else:
                if sfs not in new_690:
                    new_690.append(sfs)

        if not new_337 == old_337:
            record_delete_field(rec, '337')
            record_delete_field(rec, '980')
            record_delete_field(rec, '690')
            for f in new_337:
                record_add_field(rec, '337', subfields=f)
            for f in new_980:
                record_add_field(rec, '980', subfields=f)
            for f in new_690:
                record_add_field(rec, '690', subfields=f)

            print('\nOld 337:')
            pprint(old_rec.get('337'))
            print('New 337:')
            pprint(rec.get('337'))

            print('\nOld 690:')
            pprint(old_rec.get('690'))
            print('New 690:')
            pprint(rec.get('690'))

            print('\nOld 980:')
            pprint(old_rec.get('980'))
            print('New 980:')
            pprint(rec.get('980'))
            if raw_input('Bibupload (y/n)? ') == 'y':
                bibupload(rec, 'replace')
Beispiel #52
0
def oairepositoryupdater_task():
    """Main business logic code of oai_archive"""
    no_upload = task_get_option("no_upload")
    report = task_get_option("report")

    if report > 1:
        print_repository_status(verbose=report)
        return True

    if run_sql(
            "SELECT id FROM schTASK WHERE proc='bibupload:oairepository' AND status='WAITING'"
    ):
        write_message(
            "Previous requests of oairepository still being elaborated. Let's skip this execution."
        )
        return True

    initial_snapshot = {}
    for set_spec in all_set_specs():
        initial_snapshot[set_spec] = get_set_definitions(set_spec)
    write_message("Initial set snapshot: %s" % pformat(initial_snapshot),
                  verbose=2)

    task_update_progress("Fetching records to process")

    recids_with_oaiid = search_unit_in_bibxxx(p='*',
                                              f=CFG_OAI_ID_FIELD,
                                              type='e')
    write_message("%s recids have an OAI ID" % len(recids_with_oaiid),
                  verbose=2)

    all_current_recids = search_unit_in_bibxxx(p='*',
                                               f=CFG_OAI_SET_FIELD,
                                               type='e')
    no_more_exported_recids = intbitset(all_current_recids)
    write_message("%s recids are currently exported" %
                  (len(all_current_recids)),
                  verbose=2)

    all_affected_recids = intbitset()
    all_should_recids = intbitset()
    recids_for_set = {}
    for set_spec in all_set_specs():
        if not set_spec:
            set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
        should_recids = get_recids_for_set_spec(set_spec)
        recids_for_set[set_spec] = should_recids
        no_more_exported_recids -= should_recids
        all_should_recids |= should_recids
        current_recids = search_unit_in_bibxxx(p=set_spec,
                                               f=CFG_OAI_SET_FIELD,
                                               type='e')
        write_message(
            "%s recids should be in %s. Currently %s are in %s" %
            (len(should_recids), set_spec, len(current_recids), set_spec),
            verbose=2)
        to_add = should_recids - current_recids
        write_message("%s recids should be added to %s" %
                      (len(to_add), set_spec),
                      verbose=2)
        to_remove = current_recids - should_recids
        write_message("%s recids should be removed from %s" %
                      (len(to_remove), set_spec),
                      verbose=2)
        affected_recids = to_add | to_remove
        write_message("%s recids should be hence updated for %s" %
                      (len(affected_recids), set_spec),
                      verbose=2)
        all_affected_recids |= affected_recids

    missing_oaiid = all_should_recids - recids_with_oaiid
    write_message("%s recids are missing an oaiid" % len(missing_oaiid))
    write_message("%s recids should no longer be exported" %
                  len(no_more_exported_recids))

    ## Let's add records with missing OAI ID
    all_affected_recids |= missing_oaiid | no_more_exported_recids
    write_message("%s recids should updated" % (len(all_affected_recids)),
                  verbose=2)

    if not all_affected_recids:
        write_message("Nothing to do!")
        return True

    # Prepare to save results in a tmp file
    (fd, filename) = mkstemp(dir=CFG_TMPSHAREDDIR,
                                  prefix='oairepository_' + \
                                  time.strftime("%Y%m%d_%H%M%S_",
                                                time.localtime()))
    oai_out = os.fdopen(fd, "w")
    oai_out.write("<collection>")

    tot = 0
    # Iterate over the recids
    for i, recid in enumerate(all_affected_recids):
        task_sleep_now_if_required(can_stop_too=True)
        task_update_progress("Done %s out of %s records." % \
                             (i, len(all_affected_recids)))

        write_message("Elaborating recid %s" % recid, verbose=3)
        record = get_record(recid)
        if not record:
            write_message("Record %s seems empty. Let's skip it." % recid,
                          verbose=3)
            continue
        new_record = {}

        # Check if an OAI identifier is already in the record or
        # not.
        assign_oai_id_entry = False
        oai_id_entry = record_get_field_value(record,
                                              tag=CFG_OAI_ID_FIELD[:3],
                                              ind1=CFG_OAI_ID_FIELD[3],
                                              ind2=CFG_OAI_ID_FIELD[4],
                                              code=CFG_OAI_ID_FIELD[5])
        if not oai_id_entry:
            assign_oai_id_entry = True
            oai_id_entry = "oai:%s:%s" % (CFG_OAI_ID_PREFIX, recid)
            write_message("Setting new oai_id %s for record %s" %
                          (oai_id_entry, recid),
                          verbose=3)
        else:
            write_message("Already existing oai_id %s for record %s" %
                          (oai_id_entry, recid),
                          verbose=3)

        # Get the sets to which this record already belongs according
        # to the metadata
        current_oai_sets = set(
            record_get_field_values(record,
                                    tag=CFG_OAI_SET_FIELD[:3],
                                    ind1=CFG_OAI_SET_FIELD[3],
                                    ind2=CFG_OAI_SET_FIELD[4],
                                    code=CFG_OAI_SET_FIELD[5]))
        write_message("Record %s currently belongs to these oai_sets: %s" %
                      (recid, ", ".join(current_oai_sets)),
                      verbose=3)

        current_previous_oai_sets = set(
            record_get_field_values(record,
                                    tag=CFG_OAI_PREVIOUS_SET_FIELD[:3],
                                    ind1=CFG_OAI_PREVIOUS_SET_FIELD[3],
                                    ind2=CFG_OAI_PREVIOUS_SET_FIELD[4],
                                    code=CFG_OAI_PREVIOUS_SET_FIELD[5]))
        write_message(
            "Record %s currently doesn't belong anymore to these oai_sets: %s"
            % (recid, ", ".join(current_previous_oai_sets)),
            verbose=3)

        # Get the sets that should be in this record according to
        # settings
        updated_oai_sets = set(_set
                               for _set, _recids in iteritems(recids_for_set)
                               if recid in _recids)
        write_message("Record %s now belongs to these oai_sets: %s" %
                      (recid, ", ".join(updated_oai_sets)),
                      verbose=3)

        updated_previous_oai_sets = set(
            _set for _set in (current_previous_oai_sets - updated_oai_sets)
            | (current_oai_sets - updated_oai_sets))
        write_message(
            "Record %s now doesn't belong anymore to these oai_sets: %s" %
            (recid, ", ".join(updated_previous_oai_sets)),
            verbose=3)

        # Ok, we have the old sets and the new sets. If they are equal
        # and oai ID does not need to be added, then great, nothing to
        # change . Otherwise apply the new sets.
        if current_oai_sets == updated_oai_sets and not assign_oai_id_entry:
            write_message("Nothing has changed for record %s, let's move on!" %
                          recid,
                          verbose=3)
            continue  # Jump to next recid

        write_message("Something has changed for record %s, let's update it!" %
                      recid,
                      verbose=3)
        subfields = [(CFG_OAI_ID_FIELD[5], oai_id_entry)]
        for oai_set in updated_oai_sets:
            subfields.append((CFG_OAI_SET_FIELD[5], oai_set))
        for oai_set in updated_previous_oai_sets:
            subfields.append((CFG_OAI_PREVIOUS_SET_FIELD[5], oai_set))

        record_add_field(new_record, tag="001", controlfield_value=str(recid))
        record_add_field(new_record,
                         tag=CFG_OAI_ID_FIELD[:3],
                         ind1=CFG_OAI_ID_FIELD[3],
                         ind2=CFG_OAI_ID_FIELD[4],
                         subfields=subfields)
        oai_out.write(record_xml_output(new_record))
        tot += 1
        if tot == CFG_OAI_REPOSITORY_MARCXML_SIZE:
            oai_out.write("</collection>")
            oai_out.close()
            write_message("Wrote to file %s" % filename)
            if not no_upload:
                if task_get_option("notimechange"):
                    task_low_level_submission('bibupload', 'oairepository',
                                              '-c', filename, '-n',
                                              '-Noairepository', '-P', '-1')
                else:
                    task_low_level_submission('bibupload', 'oairepository',
                                              '-c', filename,
                                              '-Noairepository', '-P', '-1')
            # Prepare to save results in a tmp file
            (fd, filename) = mkstemp(dir=CFG_TMPSHAREDDIR,
                                        prefix='oairepository_' + \
                                        time.strftime("%Y%m%d_%H%M%S_",
                                                        time.localtime()))
            oai_out = os.fdopen(fd, "w")
            oai_out.write("<collection>")
            tot = 0
            task_sleep_now_if_required(can_stop_too=True)

    oai_out.write("</collection>")
    oai_out.close()
    write_message("Wrote to file %s" % filename)

    if tot > 0:
        if not no_upload:
            task_sleep_now_if_required(can_stop_too=True)
            if task_get_option("notimechange"):
                task_low_level_submission('bibupload', 'oairepository', '-c',
                                          filename, '-n')
            else:
                task_low_level_submission('bibupload', 'oairepository', '-c',
                                          filename)
    else:
        os.remove(filename)

    return True