Example #1
0
def check_record(record, texkey_field="035__a", extra_subfields=()):
    """
    Add a tex key to a record, checking that it doesn't have one already.
    """
    tag = texkey_field[:3]
    ind1, ind2, subfield = texkey_field[3:]

    provenances = list(record.iterfield(texkey_field[:5] + "9"))
    if len(provenances) and provenances[0][1] in ("SPIRESTeX", "INSPIRETeX"):
        for _, val in record.iterfield(texkey_field[:5] + "z"):
            if val:
                return  # Record already has a texkey

    if len(list(record.iterfield(texkey_field))) == 0:
        try:
            texkey = TexkeySeq().next_value(bibrecord=record)
        except TexkeyNoAuthorError:
            record.warn("No first author or collaboration")
            return
        subfields_to_add = [(subfield, texkey)] + map(tuple, extra_subfields)
        record_add_field(record,
                         tag=tag,
                         ind1=ind1,
                         ind2=ind2,
                         subfields=subfields_to_add)
        record.set_amended("Added Tex key '%s' to field %s" %
                           (texkey, texkey_field))
Example #2
0
def merge_record_with_template(rec, template_name, is_hp_record=False):
    """ Extend the record rec with the contents of the template and return it"""
    template = get_record_template(template_name)
    if not template:
        return
    template_bibrec = create_record(template)[0]
    # if the record is a holding pen record make all subfields volatile
    if is_hp_record:
        record_make_all_subfields_volatile(template_bibrec)
    for field_tag in template_bibrec:
        if not record_has_field(rec, field_tag):
            for field_instance in template_bibrec[field_tag]:
                record_add_field(rec, field_tag, field_instance[1], field_instance[2], subfields=field_instance[0])
        else:
            for template_field_instance in template_bibrec[field_tag]:
                subfield_codes_template = field_get_subfield_codes(template_field_instance)
                for field_instance in rec[field_tag]:
                    subfield_codes = field_get_subfield_codes(field_instance)
                    for code in subfield_codes_template:
                        if code not in subfield_codes:
                            field_add_subfield(
                                field_instance, code, field_get_subfield_values(template_field_instance, code)[0]
                            )
    record_order_subfields(rec)
    return rec
def bst_openaire_altmetric():
    """
    """
    recids = search_pattern(p="0->Z", f="0247_a")
    a = Altmetric()

    for recid in recids:
        try:
            # Check if we already have an Altmetric id
            sysno_inst = get_fieldvalues(recid, "035__9")
            if ['Altmetric'] in sysno_inst:
                continue

            doi_val = get_fieldvalues(recid, "0247_a")[0]
            json_res = a.doi(doi_val)

            rec = {}
            record_add_field(rec, "001", controlfield_value=str(recid))

            if json_res:
                record_add_field(rec,
                                 '035',
                                 subfields=[('a',
                                             str(json_res['altmetric_id'])),
                                            ('9', 'Altmetric')])
                bibupload(rec, opt_mode='correct')
        except AltmetricHTTPException, e:
            register_exception(prefix='Altmetric error (status code %s): %s' %
                               (e.status_code, str(e)),
                               alert_admin=False)
Example #4
0
def add_epic_pid(rec, recid, checksum):
    """ Adds EPIC PID to the record. If registration fails, can
    also fail the request if CFG_FAIL_ON_MISSING_PID is set to True"""
    CFG_SITE_SECURE_URL = current_app.config.get("CFG_SITE_SECURE_URL")
    location = CFG_SITE_SECURE_URL + '/record/' + str(recid)
    try:
        pid = createHandle(location, checksum)
        record_add_field(rec,
                         '024',
                         ind1='7',
                         subfields=[('2', 'PID'), ('a', pid)])
    except HTTPException as e:
        # If CFG_FAIL_ON_MISSING_PID is not found in invenio-local.conf,
        # default is to assume False
        try:
            from config import CFG_FAIL_ON_MISSING_PID
            fail = bool(CFG_FAIL_ON_MISSING_PID)
        except ImportError:
            fail = False

        current_app.logger.error(
            "Unable to obtain PID from EPIC server {0} {1}: {2}".format(
                e.code, e.name, e))
        if fail:
            raise e
Example #5
0
        def generate_columns_longer(ds):
            """ a much longer implemntation of the column generation"""
            from invenio.legacy.bibrecord import record_add_field
            rec = {}
            columns = [[num, "", ""] for num in xrange(ds.num_columns)]
            # (number, header, title)
            cur_col = 0
            for hd in ds.column_headers:
                for i in xrange(hd["colspan"]):
                    columns[cur_col][1] = hd["content"].strip()
                    cur_col += 1
            cur_col = 0
            for ct in ds.column_titles:
                for i in xrange(ct["colspan"]):
                    columns[cur_col][2] = ct["content"].strip()
                    cur_col += 1
            for col in columns:
                subfields = [("n", str(col[0]))]
                if col[2] != "":
                    subfields.append(("t", col[2]))
                if col[1] != "":
                    subfields.append(("d", col[1]))

                record_add_field(rec, "910", subfields = subfields)
            return rec
Example #6
0
def check_records(records, doi_field="0247_a", extra_subfields=(("2", "DOI"), ("9", "bibcheck"))):
    """
    Find the DOI for the records using crossref and add it to the specified
    field.

    This plugin won't ask for the DOI if it's already set.
    """
    records_to_check = {}
    for record in records:
        has_doi = False
        for position, value in record.iterfield("0247_2"):
            if value.lower() == "doi":
                has_doi = True
                break
        if not has_doi:
            records_to_check[record.record_id] = record

    dois = get_doi_for_records(records_to_check.values())
    for record_id, doi in dois.iteritems():
        record = records_to_check[record_id]
        dup_doi_recid = find_record_from_doi(doi)
        if dup_doi_recid:
            record.warn("DOI %s to be added to record %s already exists in record/s %s" % (doi, record_id, dup_doi_recid))
            continue
        subfields = [(doi_field[5], doi.encode("utf-8"))] + map(tuple, extra_subfields)
        record_add_field(record, tag=doi_field[:3], ind1=doi_field[3],
                ind2=doi_field[4], subfields=subfields)
        record.set_amended("Added DOI in field %s" % doi_field)
Example #7
0
    def generate_final_patch(self, patch_dict, recid):
        """
        Generates patch by merging modified patch and added patch

        Returns the final merged patch containing modified and added fields
        """
        def _add_to_record(record, patch):
            for tag in patch:
                for data_tuple in patch[tag]:
                    record_add_field(record,
                                     tag,
                                     data_tuple[1],
                                     data_tuple[2],
                                     '',
                                     subfields=data_tuple[0])
            return record

        final_patch = {}
        #tag_list = []

        # merge processed and added fields into one patch
        if 'MOD' in patch_dict:
            # tag_list = tag_list + patch_dict['MOD'].items()
            final_patch = _add_to_record(final_patch, patch_dict['MOD'])
        if 'ADD' in patch_dict:
            #tag_list = tag_list + patch_dict['ADD'].items()
            final_patch = _add_to_record(final_patch, patch_dict['ADD'])
        if 'DEL' in patch_dict:
            #tag_list = tag_list + patch_dict['DEL'].items()
            final_patch = _add_to_record(final_patch, patch_dict['DEL'])
        record_add_field(final_patch, '001', ' ', ' ', recid)
        return final_patch
def add_file_info(rec, form, email, sub_id, recid):
    """
    Adds the path to the file and access rights to ther record.
    """
    if 'open_access' in form:
        fft_status = 'firerole: allow any\n'
    else:
        fft_status = 'firerole: allow email "{0}"\ndeny all'.format(
            email)
    for metadata in get_depositing_files_metadata(sub_id):
        path = metadata['file']
        record_add_field(rec, 'FFT',
                         subfields=[('a', path),
                         ('n', metadata['name']), # name of the file
                         #('t', 'Type'), # TODO
                         # unfortunately s is used for a timestamp, not file size
                         #('s', 'timestamp'), # s is a timestamp
                         #('w', str(metadata['size'])), # size should be derived automatically,
                         #                              # but storing it into 'document_moreinfo' field
                         ('r', fft_status)])

        #seems to be impossible to add file size data, thought this would work

        CFG_SITE_SECURE_URL = current_app.config.get("CFG_SITE_SECURE_URL")
        url = u"{0}/record/{1}/files/{2}".format(CFG_SITE_SECURE_URL, recid, metadata['name'])
        record_add_field(rec, '856', ind1='4',
                         subfields=[('u', url),
                                    ('s', str(os.path.getsize(path))),
                                    ('y', metadata['name'])])
Example #9
0
def merge_record_with_template(rec, template_name, is_hp_record=False):
    """ Extend the record rec with the contents of the template and return it"""
    template = get_record_template(template_name)
    if not template:
        return
    template_bibrec = create_record(template)[0]
    # if the record is a holding pen record make all subfields volatile
    if is_hp_record:
        record_make_all_subfields_volatile(template_bibrec)
    for field_tag in template_bibrec:
        if not record_has_field(rec, field_tag):
            for field_instance in template_bibrec[field_tag]:
                record_add_field(rec,
                                 field_tag,
                                 field_instance[1],
                                 field_instance[2],
                                 subfields=field_instance[0])
        else:
            for template_field_instance in template_bibrec[field_tag]:
                subfield_codes_template = field_get_subfield_codes(
                    template_field_instance)
                for field_instance in rec[field_tag]:
                    subfield_codes = field_get_subfield_codes(field_instance)
                    for code in subfield_codes_template:
                        if code not in subfield_codes:
                            field_add_subfield(
                                field_instance, code,
                                field_get_subfield_values(
                                    template_field_instance, code)[0])
    record_order_subfields(rec)
    return rec
def bst_openaire_altmetric():
    """
    """
    recids = search_pattern(p="0->Z", f="0247_a")
    a = Altmetric()

    for recid in recids:
        try:
            # Check if we already have an Altmetric id
            sysno_inst = get_fieldvalues(recid, "035__9")
            if ['Altmetric'] in sysno_inst:
                continue

            doi_val = get_fieldvalues(recid, "0247_a")[0]
            json_res = a.doi(doi_val)

            rec = {}
            record_add_field(rec, "001", controlfield_value=str(recid))

            if json_res:
                record_add_field(rec, '035', subfields=[
                    ('a', str(json_res['altmetric_id'])), ('9', 'Altmetric')]
                )
                bibupload(rec, opt_mode='correct')
        except AltmetricHTTPException, e:
            register_exception(prefix='Altmetric error (status code %s): %s' %
                              (e.status_code, str(e)), alert_admin=False)
Example #11
0
    def generate_final_patch(self, patch_dict, recid):
        """
        Generates patch by merging modified patch and added patch

        Returns the final merged patch containing modified and added fields
        """
        def _add_to_record(record, patch):
            for tag in patch:
                for data_tuple in patch[tag]:
                    record_add_field(record, tag, data_tuple[1], data_tuple[2], '', subfields=data_tuple[0])
            return record

        final_patch = {}
        #tag_list = []

        # merge processed and added fields into one patch
        if 'MOD' in patch_dict:
            # tag_list = tag_list + patch_dict['MOD'].items()
            final_patch = _add_to_record(final_patch, patch_dict['MOD'])
        if 'ADD' in patch_dict:
            #tag_list = tag_list + patch_dict['ADD'].items()
            final_patch = _add_to_record(final_patch, patch_dict['ADD'])
        if 'DEL' in patch_dict:
            #tag_list = tag_list + patch_dict['DEL'].items()
            final_patch = _add_to_record(final_patch, patch_dict['DEL'])
        record_add_field(final_patch, '001', ' ', ' ', recid)
        return final_patch
Example #12
0
def add_doi_to_record(recid, doi):
    rec = {}
    record_add_field(rec, '001', controlfield_value=str(recid))
    pid_fields = [('a', doi), ('2', 'DOI')]
    record_add_field(rec, tag='024', ind1='7', subfields=pid_fields)

    from invenio.legacy.bibupload.utils import bibupload_record
    bibupload_record(record=rec, file_prefix='doi', mode='-c',
                     opts=[], alias="doi")
    return rec
Example #13
0
 def _add_to_record(record, patch):
     for tag in patch:
         for data_tuple in patch[tag]:
             record_add_field(record,
                              tag,
                              data_tuple[1],
                              data_tuple[2],
                              '',
                              subfields=data_tuple[0])
     return record
def rule_add_recid(header, record):
    # if not BIBMATCH_MATCHED in header:
    #     return record
    if '001' in record.keys():
        recid = str(record['001'][0][3])
        _print("Record already has recid %s" % (recid,))
        return record
    recids = REGEX_BIBMATCH_RESULTS.findall(header)
    if len(recids) == 1:
        record_add_field(record, '001', controlfield_value=recids[0])
    return record
def rule_create_fft(header, record):
    for field in record_get_field_instances(record, '856', ind1='4'):
        url = None
        for code, value in field_get_subfield_instances(field):
            if code == 'u':
                url = value
                break
        if url:
            subs = [('a', url), ('t', 'INSPIRE-PUBLIC'), ('d', 'Fulltext')]
            record_add_field(record, 'FFT', subfields=subs)
    return record
Example #16
0
    def _modify_record(self,
                       recid,
                       test_func,
                       replace_func,
                       include_func,
                       append_colls=[],
                       replace_colls=[]):
        """Generate record a MARCXML file.

        @param test_func: Function to test if a collection id should be changed
        @param replace_func: Function to replace the collection id.
        @param include_func: Function to test if collection should be included
        """
        from invenio.legacy.search_engine import get_record
        rec = get_record(recid)
        newcolls = []
        dirty = False

        try:
            colls = rec['980']
            if replace_colls:
                for c in replace_colls:
                    newcolls.append([('a', c)])
                    dirty = True
            else:
                for c in colls:
                    try:
                        # We are only interested in subfield 'a'
                        code, val = c[0][0]
                        if test_func(code, val):
                            c[0][0] = replace_func(code, val)
                            dirty = True
                        if include_func(code, val):
                            newcolls.append(c[0])
                        else:
                            dirty = True
                    except IndexError:
                        pass
                for c in append_colls:
                    newcolls.append([('a', c)])
                    dirty = True
        except KeyError:
            return False

        if not dirty:
            return False

        rec = {}
        record_add_field(rec, '001', controlfield_value=str(recid))

        for subfields in newcolls:
            record_add_field(rec, '980', subfields=subfields)

        return rec
Example #17
0
def add_domain_fields(rec, form, meta):
    """
    Adds a domain specific fields. These are just added as name value pairs
    to field 690.
    """
    for fs in meta.fieldsets:
        if fs.name != 'Generic':  # TODO: this is brittle; get from somewhere
            for k in (fs.optional_fields + fs.basic_fields):
                if k in form and form[k]:
                    fields = form.getlist(k)
                    for f in fields:
                        if f and not f.isspace():
                            record_add_field(rec, '690',
                                     subfields=[('a', k), ('b', f)])
Example #18
0
    def _modify_record(self, recid, test_func, replace_func, include_func,
                       append_colls=[], replace_colls=[]):
        """
        Generate record a MARCXML file

        @param test_func: Function to test if a collection id should be changed
        @param replace_func: Function to replace the collection id.
        @param include_func: Function to test if collection should be included
        """
        from invenio.legacy.search_engine import get_record
        rec = get_record(recid)
        newcolls = []
        dirty = False

        try:
            colls = rec['980']
            if replace_colls:
                for c in replace_colls:
                    newcolls.append([('a', c)])
                    dirty = True
            else:
                for c in colls:
                    try:
                        # We are only interested in subfield 'a'
                        code, val = c[0][0]
                        if test_func(code, val):
                            c[0][0] = replace_func(code, val)
                            dirty = True
                        if include_func(code, val):
                            newcolls.append(c[0])
                        else:
                            dirty = True
                    except IndexError:
                        pass
                for c in append_colls:
                    newcolls.append([('a', c)])
                    dirty = True
        except KeyError:
            return False

        if not dirty:
            return False

        rec = {}
        record_add_field(rec, '001', controlfield_value=str(recid))

        for subfields in newcolls:
            record_add_field(rec, '980', subfields=subfields)

        return rec
Example #19
0
def openaire_altmetric_update(recids, upload=True):
    """
    Retrieve Altmetric information for a record.
    """
    logger.debug("Checking Altmetric for recids %s" % recids)
    a = Altmetric()

    records = []
    for recid in recids:
        logger.debug("Checking Altmetric for recid %s" % recid)
        try:
            # Check if we already have an Altmetric id
            sysno_inst = get_fieldvalues(recid, "035__9")
            if ['Altmetric'] in sysno_inst:
                continue

            doi_val = get_fieldvalues(recid, "0247_a")[0]
            logger.debug("Found DOI %s" % doi_val)
            json_res = a.doi(doi_val)
            logger.debug("Altmetric response: %s" % json_res)

            rec = {}
            record_add_field(rec, "001", controlfield_value=str(recid))

            if json_res:
                record_add_field(rec,
                                 '035',
                                 subfields=[('a',
                                             str(json_res['altmetric_id'])),
                                            ('9', 'Altmetric')])
                records.append(rec)
        except AltmetricHTTPException as e:
            logger.warning(
                'Altmetric error for recid %s with DOI %s (status code %s): %s'
                % (recid, doi_val, e.status_code, str(e)))
            register_exception(prefix='Altmetric error (status code %s): %s' %
                               (e.status_code, str(e)),
                               alert_admin=False)
        except IndexError:
            logger.debug("No DOI found")
            pass

    if upload and records:
        if len(records) == 1:
            bibupload(record=records[0], file_prefix="altmetric")
        else:
            bibupload(collection=records, file_prefix="altmetric")

    return records
Example #20
0
def modify_record_timestamp(revision_xml, last_revision_ts):
    """ Modify tag 005 to add the revision passed as parameter.
    @param revision_xml: marcxml representation of the record to modify
    @type revision_xml: string
    @param last_revision_ts: timestamp to add to 005 tag
    @type last_revision_ts: string

    @return: marcxml with 005 tag modified
    """
    recstruct = create_record(revision_xml)[0]
    if "005" in recstruct:
        record_modify_controlfield(recstruct, "005", last_revision_ts, field_position_local=0)
    else:
        record_add_field(recstruct, "005", controlfield_value=last_revision_ts)
    return record_xml_output(recstruct)
Example #21
0
def _prepare_marcxml(recid_a, rn_a, recid_b, rn_b, what_is_a_for_b, what_is_b_for_a, display_in_a=True, display_in_b=True):
    record_a = {}
    record_b = {}
    record_add_field(record_a, "001", controlfield_value=str(recid_a))
    record_add_field(record_a, CFG_OTHER_RELATIONSHIP_ENTRY, ind1=display_in_a and "0" or "1", subfields=[('i', what_is_b_for_a), ('r', rn_b), ('w', str(recid_b))])
    record_add_field(record_b, "001", controlfield_value=str(recid_b))
    record_add_field(record_b, CFG_OTHER_RELATIONSHIP_ENTRY, ind1=display_in_b and "0" or "1", subfields=[('i', what_is_a_for_b), ('r', rn_a), ('w', str(recid_a))])
    return "<collection>\n%s\n%s</collection>" % (record_xml_output(record_a), record_xml_output(record_b))
Example #22
0
def add_domain_fields(rec, form, meta):
    """
    Adds a domain specific fields. These are just added as name value pairs
    to field 690.
    """
    for fs in meta.fieldsets:
        if fs.name != 'Generic':  # TODO: this is brittle; get from somewhere
            for k in (fs.optional_fields + fs.basic_fields):
                if k in form and form[k]:
                    fields = form.getlist(k)
                    for f in fields:
                        if f and not f.isspace():
                            record_add_field(rec,
                                             '690',
                                             subfields=[('a', k), ('b', f)])
Example #23
0
def main():
    from invenio.legacy.search_engine import get_record
    from invenio.legacy.bibupload.engine import (
        bibupload,
    )
    from invenio.legacy.bibrecord import (
        record_add_field,
        record_delete_field,
    )

    # Loop through list of records
    for r in RECORDS:
        old_rec = get_record(r)
        rec = get_record(r)

        if not rec:
            break

        print('Processing record: {0}'.format(r))
        # pprint(rec)

        old_690 = [f[0] for f in rec.get('690', [])]
        new_690 = []
        for f in old_690:
            a = f[0]
            b = f[1]
            t = [a, (b[0], VALUES.get(r))] if (a[0] == 'a' and
                                               a[1] == 'language_code' and
                                               b[0] == 'b' and
                                               VALUES.get(r)) \
                else f
            new_690.append(t)

        if not new_690 == old_690:
            record_delete_field(rec, '690')
            for f in new_690:
                record_add_field(rec, '690', subfields=f)

            # pprint(rec)
            print('\nOld 690:')
            pprint(old_rec.get('690'))
            print('\nNew 690:')
            pprint(rec.get('690'))

            if raw_input('Bibupload (y/n)? ') == 'y':
                bibupload(rec, 'delete')
                sleep(5)
                bibupload(rec, 'replace')
Example #24
0
def modify_record_timestamp(revision_xml, last_revision_ts):
    """ Modify tag 005 to add the revision passed as parameter.
    @param revision_xml: marcxml representation of the record to modify
    @type revision_xml: string
    @param last_revision_ts: timestamp to add to 005 tag
    @type last_revision_ts: string

    @return: marcxml with 005 tag modified
    """
    recstruct = create_record(revision_xml)[0]
    if "005" in recstruct:
        record_modify_controlfield(recstruct, "005", last_revision_ts,
                                   field_position_local=0)
    else:
        record_add_field(recstruct, '005', controlfield_value=last_revision_ts)
    return record_xml_output(recstruct)
Example #25
0
def create_xml(recid, texkey):
    """ Create the marcxml snippet with the new texkey

    @param recid: recid of the record to be updated
    @type: int
    @param texkey: texkey that has been generated
    @type: str

    @return: marcxml with the fields to be record_add_field
    @rtype: str
    """
    record = {}
    record_add_field(record, '001', controlfield_value=str(recid))
    subfields_toadd = [('a', texkey), ('9', 'INSPIRETeX')]
    record_add_field(record, tag='035', subfields=subfields_toadd)
    return print_rec(record)
Example #26
0
def create_marc(form, sub_id, email):
    """
    Generates MARC data used by Invenio from the filled out form, then
    submits it to the Invenio system.
    """
    rec = {}
    recid = create_recid()
    record_add_field(rec, '001', controlfield_value=str(recid))
    add_basic_fields(rec, form, email)
    add_domain_fields(rec, form)
    add_file_info(rec, form, email, sub_id, recid)
    checksum = create_checksum(rec, sub_id)
    add_epic_pid(rec, recid, checksum)
    marc = record_xml_output(rec)

    return recid, marc
Example #27
0
def create_xml(recid, texkey):
    """Create the marcxml snippet with the new texkey.

    :param recid: recid of the record to be updated
    :type: int
    :param texkey: texkey that has been generated
    :type: str

    :return: marcxml with the fields to be record_add_field
    :rtype: str
    """
    record = {}
    record_add_field(record, '001', controlfield_value=str(recid))
    subfields_toadd = [('a', texkey), ('9', 'INSPIRETeX')]
    record_add_field(record, tag='035', subfields=subfields_toadd)
    return print_rec(record)
Example #28
0
def openaire_altmetric_update(recids, upload=True):
    """
    Retrieve Altmetric information for a record.
    """
    logger.debug("Checking Altmetric for recids %s" % recids)
    a = Altmetric()

    records = []
    for recid in recids:
        logger.debug("Checking Altmetric for recid %s" % recid)
        try:
            # Check if we already have an Altmetric id
            sysno_inst = get_fieldvalues(recid, "035__9")
            if ["Altmetric"] in sysno_inst:
                continue

            doi_val = get_fieldvalues(recid, "0247_a")[0]
            logger.debug("Found DOI %s" % doi_val)
            json_res = a.doi(doi_val)
            logger.debug("Altmetric response: %s" % json_res)

            rec = {}
            record_add_field(rec, "001", controlfield_value=str(recid))

            if json_res:
                record_add_field(rec, "035", subfields=[("a", str(json_res["altmetric_id"])), ("9", "Altmetric")])
                records.append(rec)
        except AltmetricHTTPException as e:
            logger.warning(
                "Altmetric error for recid %s with DOI %s (status code %s): %s"
                % (recid, doi_val, e.status_code, str(e))
            )
            register_exception(
                prefix="Altmetric error (status code %s): %s" % (e.status_code, str(e)), alert_admin=False
            )
        except IndexError:
            logger.debug("No DOI found")
            pass

    if upload and records:
        if len(records) == 1:
            bibupload(record=records[0], file_prefix="altmetric")
        else:
            bibupload(collection=records, file_prefix="altmetric")

    return records
Example #29
0
def add_file_info(rec, form, email, sub_id, recid):
    """
    Adds the path to the file and access rights to ther record.
    """
    CFG_B2SHARE_UPLOAD_FOLDER = current_app.config.get("CFG_B2SHARE_UPLOAD_FOLDER")
    upload_dir = os.path.join(CFG_B2SHARE_UPLOAD_FOLDER, sub_id)
    files = os.listdir(upload_dir)
    if 'open_access' in form:
        fft_status = 'firerole: allow any\n'
    else:
        fft_status = 'firerole: allow email "{0}"\ndeny all'.format(
            email)
    for f in files:
        path = os.path.join(upload_dir, f)
        if f.startswith('metadata_'):
            # we do not want to do load file metadata into Invenio as files, will extract into MARC fields
            continue
        # load corresponding metadata file
        metadata = {}
        metadata_filename = os.path.join(upload_dir, 'metadata_' + f)
        if os.path.isfile(metadata_filename):
            # expecting to load a dict with the following structure: dict(name=name, file=file_path, size=size)
            metadata = pickle.load(open(metadata_filename, 'rb'))
        else:
            current_app.logger.error('Submitted file \'%s\' is missing metadata file, using default' % f)
            metadata = dict(name=f, file=path, size=str(os.path.getsize(path)))

        record_add_field(rec, 'FFT',
                         subfields=[('a', path),
                         ('n', metadata['name']), # name of the file
                         #('t', 'Type'), # TODO
                         # unfortunately s is used for a timestamp, not file size
                         #('s', 'timestamp'), # s is a timestamp
                         #('w', str(metadata['size'])), # size should be derived automatically,
                         #                              # but storing it into 'document_moreinfo' field
                         ('r', fft_status)])

        #seems to be impossible to add file size data, thought this would work

        CFG_SITE_SECURE_URL = current_app.config.get("CFG_SITE_SECURE_URL")
        url = "{0}/record/{1}/files/{2}".format(CFG_SITE_SECURE_URL, recid, f)
        record_add_field(rec, '856', ind1='4',
                         subfields=[('u', url),
                                    ('s', str(os.path.getsize(path))),
                                    ('y',metadata['name'])])
Example #30
0
    def process_record(self, record):
        """@see: BaseFieldCommand.process_record"""
        # if the tag is empty, we don't make any changes
        if self._tag == "" or self._tag == None:
            return

        field_number = bibrecord.record_add_field(record, self._tag,
                                                  self._ind1, self._ind2)
        self._apply_subfield_commands_to_field(record, field_number)
Example #31
0
def openaire_altmetric_update(recids, upload=True):
    """
    Retrieve Altmetric information for a record.
    """
    logger.debug("Checking Altmetric for recids %s" % recids)
    a = Altmetric()

    records = []
    for recid in recids:
        logger.debug("Checking Altmetric for recid %s" % recid)
        try:
            # Check if we already have an Altmetric id
            sysno_inst = get_fieldvalues(recid, "035__9")
            if ['Altmetric'] in sysno_inst:
                continue

            doi_val = get_fieldvalues(recid, "0247_a")[0]
            logger.debug("Found DOI %s" % doi_val)
            json_res = a.doi(doi_val)
            logger.debug("Altmetric response: %s" % json_res)

            rec = {}
            record_add_field(rec, "001", controlfield_value=str(recid))

            if json_res:
                record_add_field(rec, '035', subfields=[
                    ('a', str(json_res['altmetric_id'])),
                    ('9', 'Altmetric')
                ])
                records.append(rec)
        except AltmetricHTTPException, e:
            logger.warning(
                'Altmetric error for recid %s with DOI %s (status code %s): %s'
                % (recid, doi_val, e.status_code, str(e))
            )
            register_exception(
                prefix='Altmetric error (status code %s): %s' % (
                    e.status_code, str(e)),
                alert_admin=False
            )
        except IndexError:
            logger.debug("No DOI found")
            pass
Example #32
0
def create_marc(form, sub_id, email, meta):
    """
    Generates MARC data used by Invenio from the filled out form, then
    submits it to the Invenio system.
    """
    rec = {}
    recid = create_recid()

    record_add_field(rec, '001', controlfield_value=str(recid))
    add_basic_fields(rec, form, meta)
    record_add_field(rec, '856', ind1='0', subfields=[('f', email)])

    add_domain_fields(rec, form, meta)
    add_file_info(rec, form, email, sub_id, recid)
    checksum = create_checksum(rec, sub_id)
    add_epic_pid(rec, recid, checksum)
    marc = record_xml_output(rec)

    return recid, marc
Example #33
0
def create_checksum(rec, sub_id, buffersize=64 * 1024):
    """
    Creates a checksum of all the files in the record, and adds it
    to the MARC.
    Returns: checksum as a hex string
    """
    sha = hashlib.sha256()
    files_metadata = get_depositing_files_metadata(sub_id)
    files = [f['file'] for f in files_metadata]
    for filepath in sorted(files):
        with open(filepath, 'rb', buffering=0) as fp:
            while True:
                block = fp.read(buffersize)
                if not block:
                    break
                sha.update(block)
    cs = sha.hexdigest()
    record_add_field(rec, '024', ind1='7',
                     subfields=[('2', 'checksum'), ('a', cs)])
    return cs
Example #34
0
def add_file_info(rec, form, email, sub_id, recid):
    """
    Adds the path to the file and access rights to ther record.
    """
    if 'open_access' in form:
        fft_status = 'firerole: allow any\n'
    else:
        fft_status = 'firerole: allow email "{0}"\ndeny all'.format(email)

    if 'embargo_till' in form:
        embargodate = parser.parse(form['embargo_till'])
        embargodate = datetime.strftime(embargodate, '%Y-%m-%d')
        fft_status = 'firerole: deny until "%s"\nallow any\n' % embargodate

    for metadata in get_depositing_files_metadata(sub_id):
        path = metadata['file']
        record_add_field(
            rec,
            'FFT',
            subfields=[
                ('a', path),
                ('n', metadata['name']),  # name of the file
                #('t', 'Type'), # TODO
                # unfortunately s is used for a timestamp, not file size
                #('s', 'timestamp'), # s is a timestamp
                #('w', str(metadata['size'])), # size should be derived automatically,
                #                              # but storing it into 'document_moreinfo' field
                ('r', fft_status)
            ])

        #seems to be impossible to add file size data, thought this would work

        CFG_SITE_SECURE_URL = current_app.config.get("CFG_SITE_SECURE_URL")
        url = u"{0}/record/{1}/files/{2}".format(CFG_SITE_SECURE_URL, recid,
                                                 metadata['name'])
        record_add_field(rec,
                         '856',
                         ind1='4',
                         subfields=[('u', url),
                                    ('s', str(os.path.getsize(path))),
                                    ('y', metadata['name'])])
Example #35
0
def check_record(record, texkey_field="035__a", extra_subfields=()):
    """
    Add a tex key to a record, checking that it doesn't have one already.
    """
    tag = texkey_field[:3]
    ind1, ind2, subfield = texkey_field[3:]

    provenances = list(record.iterfield(texkey_field[:5] + "9"))
    if len(provenances) and provenances[0][1] in ("SPIRESTeX", "INSPIRETeX"):
        for _, val in record.iterfield(texkey_field[:5] + "z"):
            if val:
                return  # Record already has a texkey

    if len(list(record.iterfield(texkey_field))) == 0:
        try:
            texkey = TexkeySeq().next_value(bibrecord=record)
        except TexkeyNoAuthorError:
            record.warn("No first author or collaboration")
            return
        subfields_to_add = [(subfield, texkey)] + map(tuple, extra_subfields)
        record_add_field(record, tag=tag, ind1=ind1, ind2=ind2, subfields=subfields_to_add)
        record.set_amended("Added Tex key '%s' to field %s" % (texkey, texkey_field))
Example #36
0
def add_domain_fields(rec, form):
    """
    Adds a domain specific fields. These are just added as name value pairs
    to field 690.
    """

    domain = form['domain'].lower()
    if domain in metadata_classes():
        meta = metadata_classes()[domain]()
    else:
        #no domain stuff
        return

    for fs in meta.fieldsets:
        if fs.name != 'Generic':  # TODO: this is brittle; get from somewhere
            for k in (fs.optional_fields + fs.basic_fields):
                if k in form and form[k]:
                    fields = form.getlist(k)
                    for f in fields:
                        if f and not f.isspace():
                            record_add_field(rec, '690',
                                     subfields=[('a', k), ('b', f)])
Example #37
0
def create_checksum(rec, sub_id, buffersize=64 * 1024):
    """
    Creates a checksum of all the files in the record, and adds it
    to the MARC.
    Returns: checksum as a hex string
    """
    sha = hashlib.sha256()
    files_metadata = get_depositing_files_metadata(sub_id)
    files = [f['file'] for f in files_metadata]
    for filepath in sorted(files):
        with open(filepath, 'rb', buffering=0) as fp:
            while True:
                block = fp.read(buffersize)
                if not block:
                    break
                sha.update(block)
    cs = sha.hexdigest()
    record_add_field(rec,
                     '024',
                     ind1='7',
                     subfields=[('2', 'checksum'), ('a', cs)])
    return cs
Example #38
0
def create_checksum(rec, sub_id, buffersize=64 * 1024):
    """
    Creates a checksum of all the files in the record, and adds it
    to the MARC.
    Returns: checksum as a hex string
    """
    sha = hashlib.sha256()
    CFG_B2SHARE_UPLOAD_FOLDER = current_app.config.get("CFG_B2SHARE_UPLOAD_FOLDER")
    upload_dir = os.path.join(CFG_B2SHARE_UPLOAD_FOLDER, sub_id)
    files = sorted(os.listdir(upload_dir))
    for f in files:
        filepath = os.path.join(upload_dir, f)
        with open(filepath, 'rb', buffering=0) as fp:
            while True:
                block = fp.read(buffersize)
                if not block:
                    break
                sha.update(block)
    cs = sha.hexdigest()
    record_add_field(rec, '024', ind1='7',
                     subfields=[('2', 'checksum'), ('a', cs)])
    return cs
Example #39
0
def merge_record_with_template(rec, template_name):
    """ Extend the record rec with the contents of the template and return it"""
    template = get_record_template(template_name)
    if not template:
        return
    template_bibrec = create_record(template)[0]

    for field_tag in template_bibrec:
        if not record_has_field(rec, field_tag):
            for field_instance in template_bibrec[field_tag]:
                record_add_field(rec, field_tag, field_instance[1],
                                 field_instance[2], subfields=field_instance[0])
        else:
            for template_field_instance in template_bibrec[field_tag]:
                subfield_codes_template = field_get_subfield_codes(template_field_instance)
                for field_instance in rec[field_tag]:
                    subfield_codes = field_get_subfield_codes(field_instance)
                    for code in subfield_codes_template:
                        if code not in subfield_codes:
                            field_add_subfield(field_instance, code,
                                               field_get_subfield_values(template_field_instance,
                                               code)[0])
    return rec
Example #40
0
def add_epic_pid(rec, recid, checksum):
    """ Adds EPIC PID to the record. If registration fails, can
    also fail the request if CFG_FAIL_ON_MISSING_PID is set to True"""
    CFG_SITE_SECURE_URL = current_app.config.get("CFG_SITE_SECURE_URL")
    location = CFG_SITE_SECURE_URL + '/record/' + str(recid)
    try:
        pid = createHandle(location, checksum)
        record_add_field(rec, '024', ind1='7',
                         subfields=[('2', 'PID'), ('a', pid)])
    except HTTPException as e:
        # If CFG_FAIL_ON_MISSING_PID is not found in invenio-local.conf,
        # default is to assume False
        try:
            from config import CFG_FAIL_ON_MISSING_PID
            fail = bool(CFG_FAIL_ON_MISSING_PID)
        except ImportError:
            fail = False

        current_app.logger.error(
            "Unable to obtain PID from EPIC server {0} {1}: {2}".
            format(e.code, e.name, e))
        if fail:
            raise e
Example #41
0
def check_records(records,
                  doi_field="0247_a",
                  extra_subfields=(("2", "DOI"), ("9", "bibcheck"))):
    """
    Find the DOI for the records using crossref and add it to the specified
    field.

    This plugin won't ask for the DOI if it's already set.
    """
    records_to_check = {}
    for record in records:
        has_doi = False
        for position, value in record.iterfield("0247_2"):
            if value.lower() == "doi":
                has_doi = True
                break
        if not has_doi:
            records_to_check[record.record_id] = record

    dois = get_doi_for_records(records_to_check.values())
    for record_id, doi in dois.iteritems():
        record = records_to_check[record_id]
        dup_doi_recid = find_record_from_doi(doi)
        if dup_doi_recid:
            record.warn(
                "DOI %s to be added to record %s already exists in record/s %s"
                % (doi, record_id, dup_doi_recid))
            continue
        subfields = [(doi_field[5], doi.encode("utf-8"))] + map(
            tuple, extra_subfields)
        record_add_field(record,
                         tag=doi_field[:3],
                         ind1=doi_field[3],
                         ind2=doi_field[4],
                         subfields=subfields)
        record.set_amended("Added DOI in field %s" % doi_field)
Example #42
0
def control_actions(record, curate=None, archive=None, publish=None):
    rec = {}
    record_add_field(rec, '001', controlfield_value=str(record['recid']))
    if curate is None:
        curate = record.get('record_curated_in_project', False)
    if archive is None:
        archive = record.get('record_selected_for_archive', False)
    if publish is None:
        publish = record.get('record_public_from_project', False)

    project_info_fields = [('a', '%s' % curate)]
    record_add_field(rec, tag='983', ind1='_',
                     ind2='_', subfields=project_info_fields)
    project_info_fields = [('b', '%s' % publish)]
    record_add_field(rec, tag='983', ind1='_',
                     ind2='_', subfields=project_info_fields)
    project_info_fields = [('c', '%s' % archive)]
    record_add_field(rec, tag='983', ind1='_',
                     ind2='_', subfields=project_info_fields)
    from invenio.legacy.bibupload.utils import bibupload_record
    bibupload_record(record=rec, file_prefix='project_info', mode='-c',
                     opts=[], alias="project_info")
Example #43
0
    def find_modified_tags(self, common_tags, record1, record2):
        """
        For each tag common to Record1 and Record2, checks for modifictions
        at field-level, indicator-level and subfield-level.

        Returns a dictionary of tags and corresponding fields from Record1
        that have been found to have modified.
        """

        result = {}
        for tag in common_tags:
            # retrieve tag instances of record1 and record2
            rec1_tag_val = record_get_field_instances(record1, tag, '%', '%')
            rec2_tag_val = record_get_field_instances(record2, tag, '%', '%')
            if rec1_tag_val:
                rec1_ind = self.group_tag_values_by_indicator(rec1_tag_val)
            if rec2_tag_val:
                rec2_ind = self.group_tag_values_by_indicator(rec2_tag_val)

            # NOTE: At this point rec1_ind and rec2_ind will be dictionary
            # Key ==> (ind1, ind2) tuple
            # Val ==> list of data_tuple => [dt1,dt2]
            # dt(n) => ([sfl],ind1,ind2,ctrlfield,fn)

            # Generating 3 different dictionaries
            # common/added/deleted ind pairs in record1 based on record2
            (com_ind, add_ind,
             del_ind) = self.compare_tags_by_ind(rec1_ind, rec2_ind)

            if add_ind:
                for ind_pair in add_ind:
                    for data_tuple in add_ind[ind_pair]:
                        subfield_list = data_tuple[0]
                        record_add_field(result,
                                         tag,
                                         ind_pair[0],
                                         ind_pair[1],
                                         '',
                                         subfields=subfield_list)

            # Indicators that are deleted from record1 w.r.t record2 will be added with special code
            if del_ind:
                for ind_pair in del_ind:
                    record_add_field(result, tag, ind_pair[0], ind_pair[1], '',
                                     [(CFG_BIBUPLOAD_DELETE_CODE,
                                       CFG_BIBUPLOAD_DELETE_VALUE)])

            # Common modified fields. Identifying changes at subfield level
            if com_ind:
                for ind_pair in com_ind:
                    # NOTE: sf_rec1 and sf_rec2 are list of list of subfields
                    # A simple list comparison is sufficient in this scneario
                    # Any change in the order of fields or changes in subfields
                    # will cause the entire list of data_tuple for that ind_pair
                    # to be copied from record1(upload) to result.
                    if tag in CFG_BIBUPLOAD_CONTROLFIELD_TAGS:
                        cf_rec1 = [
                            data_tuple[3] for data_tuple in rec1_ind[ind_pair]
                        ]
                        cf_rec2 = [
                            data_tuple[3] for data_tuple in rec2_ind[ind_pair]
                        ]
                        if cf_rec1 != cf_rec2:
                            for data_tuple in com_ind[ind_pair]:
                                record_add_field(
                                    result,
                                    tag,
                                    controlfield_value=data_tuple[3])
                    else:
                        sf_rec1 = [
                            data_tuple[0] for data_tuple in rec1_ind[ind_pair]
                        ]
                        sf_rec2 = [
                            data_tuple[0] for data_tuple in rec2_ind[ind_pair]
                        ]
                        if sf_rec1 != sf_rec2:
                            # change at subfield level/ re-oredered fields
                            for data_tuple in com_ind[ind_pair]:
                                # com_ind will have data_tuples of record1(upload) and not record2
                                subfield_list = data_tuple[0]
                                record_add_field(result,
                                                 tag,
                                                 ind_pair[0],
                                                 ind_pair[1],
                                                 '',
                                                 subfields=subfield_list)

        return result
Example #44
0
def tweet_to_record(tweet, query):
    """
    Transform a tweet into a record.
    @note: you may want to highly customize this.
    """
    rec = {}
    ## Let's normalize the body of the tweet.
    text = tweet.text.encode('UTF-8')
    text = text.replace('&gt;', '>')
    text = text.replace('&lt;', '<')
    text = text.replace('&quot;', "'")
    text = text.replace('&amp;', '&')

    ## Let's add the creation date
    try:
        creation_date = time.strptime(tweet.created_at,
                                      '%a, %d %b %Y %H:%M:%S +0000')
    except ValueError:
        creation_date = time.strptime(tweet.created_at,
                                      '%a %b %d %H:%M:%S +0000 %Y')
    record_add_field(rec, '260__c',
                     time.strftime('%Y-%m-%dZ%H:%M:%ST', creation_date))

    ## Let's add the Tweet ID
    record_add_field(rec, '970', subfields=[('a', str(tweet.id))])

    ## Let's add the body of the tweet as an abstract
    record_add_field(rec, '520', subfields=[('a', text)])

    ## Let's re-add the body of the tweet as a title.
    record_add_field(rec, '245', subfields=[('a', text)])

    ## Let's fetch information about the user
    try:
        user = _TWITTER_API.GetUser(tweet.from_user)

        ## Let's add the user name as author of the tweet
        record_add_field(rec,
                         '100',
                         subfields=[('a', str(user.name.encode('UTF-8')))])

        ## Let's fetch the icon of the user profile, and let's upload it as
        ## an image (and an icon of itself)
        record_add_field(rec,
                         'FFT',
                         subfields=[
                             ('a', user.profile.image_url.encode('UTF-8')),
                             ('x', user.profile.image_url.encode('UTF-8'))
                         ])
    except Exception as err:
        write_message("WARNING: issue when fetching the user: %s" % err,
                      stream=sys.stderr)
    if hasattr(tweet, 'iso_language_code'):
        ## Let's add the language of the Tweet if available (also this depends)
        ## on the kind of Twitter API call we used
        record_add_field(rec,
                         '045',
                         subfields=[('a',
                                     tweet.iso_language_code.encode('UTF-8'))])

    ## Let's tag this record as a TWEET so that later we can build a collection
    ## out of these records.
    record_add_field(rec, '980', subfields=[('a', 'TWEET'), ('b', query)])

    ## Some smart manipulations: let's parse out URLs and tags from the body
    ## of the Tweet.
    for url in _RE_GET_HTTP.findall(text):
        url = url[0]
        record_add_field(rec, '856', '4', subfields=[('u', url)])

    for tag in _RE_TAGS.findall(text):
        ## And here we add the keywords.
        record_add_field(rec,
                         '653',
                         '1',
                         subfields=[('a', tag), ('9', 'TWITTER')])

    ## Finally we shall serialize everything to MARCXML
    return record_xml_output(rec)
Example #45
0
def add_basic_fields(rec, form, meta):
    """
    Adds the basic fields from the form. Note that these fields are mapped
    to specific MARC fields. For information on the fields see the www.loc.gov
    website. For example http://www.loc.gov/marc/bibliographic/bd260.html
    contains information on field 260 for publication data.
    """
    # why aren't subfields a dictionary?!
    try:
        if form.get('title'):
            record_add_field(rec,
                             '245',
                             subfields=[('a',
                                         remove_html_markup(form['title']))])

        if form.get('creator'):
            fields = form.getlist('creator')
            for f in fields:
                if f and not f.isspace():
                    record_add_field(rec,
                                     '100',
                                     subfields=[
                                         ('a', remove_html_markup(f.strip()))
                                     ])

        if form.get('domain'):
            record_add_field(rec,
                             '980',
                             subfields=[('a',
                                         remove_html_markup(form['domain']))])

        pubfields = []
        pubfields.append(
            ('b',
             remove_html_markup(form.get('publisher',
                                         meta.publisher_default))))
        if form.get('publication_date'):
            pubfields.append(
                ('c', remove_html_markup(form['publication_date'])))
        if pubfields:
            record_add_field(rec, '260', subfields=pubfields)

        if 'open_access' in form:
            record_add_field(rec, '542', subfields=[('l', 'open')])
        else:
            record_add_field(rec, '542', subfields=[('l', 'restricted')])

        if form.get('licence'):
            record_add_field(rec,
                             '540',
                             subfields=[('a',
                                         remove_html_markup(form['licence']))])
        record_add_field(rec,
                         '520',
                         subfields=[('a',
                                     remove_html_markup(form['description']))])

        if form.get('contact_email'):
            record_add_field(rec,
                             '270',
                             subfields=[
                                 ('m',
                                  remove_html_markup(form['contact_email']))
                             ])

        if form.get('keywords'):
            for f in form.getlist('keywords'):
                for kw in f.split(','):
                    if kw and not kw.isspace():
                        record_add_field(rec,
                                         '653',
                                         ind1='1',
                                         subfields=[
                                             ('a',
                                              remove_html_markup(kw.strip()))
                                         ])

        if form.get('contributors'):
            fields = form.getlist('contributors')
            for f in fields:
                if f and not f.isspace():
                    record_add_field(rec,
                                     '700',
                                     subfields=[
                                         ('a', remove_html_markup(f.strip()))
                                     ])

        record_add_field(rec,
                         '546',
                         subfields=[('a',
                                     remove_html_markup(
                                         form.get('language',
                                                  meta.language_default)))])

        if form.get('resource_type'):
            fields = form.getlist('resource_type')
            for f in fields:
                record_add_field(rec,
                                 '337',
                                 subfields=[('a', remove_html_markup(f))])
        # Special case for the 'Linguistics' domain:
        # All the ling_resource_type(s) are also resource_type(s), going into '337'
        if form.get('ling_resource_type'):
            fields = form.getlist('ling_resource_type')
            for f in fields:
                record_add_field(rec,
                                 '337',
                                 subfields=[('a', remove_html_markup(f))])

        if form.get('alternate_identifier'):
            record_add_field(rec,
                             '024',
                             subfields=[('a',
                                         remove_html_markup(
                                             form['alternate_identifier']))])

        if form.get('version'):
            record_add_field(rec,
                             '250',
                             subfields=[('a',
                                         remove_html_markup(form['version']))])

        if form.get('discipline'):
            fields = form.getlist('discipline')
            for f in fields:
                record_add_field(rec,
                                 '526',
                                 subfields=[('a', remove_html_markup(f))])

        CFG_SITE_NAME = current_app.config.get("CFG_SITE_NAME")
        record_add_field(rec,
                         '264',
                         subfields=[('b', CFG_SITE_NAME),
                                    ('c', str(datetime.utcnow()) + " UTC")])
    except Exception as e:
        current_app.logger.error(e)
        raise
Example #46
0
def oairepositoryupdater_task():
    """Main business logic code of oai_archive"""
    no_upload = task_get_option("no_upload")
    report = task_get_option("report")

    if report > 1:
        print_repository_status(verbose=report)
        return True

    if run_sql(
            "SELECT id FROM schTASK WHERE proc='bibupload:oairepository' AND status='WAITING'"
    ):
        write_message(
            "Previous requests of oairepository still being elaborated. Let's skip this execution."
        )
        return True

    initial_snapshot = {}
    for set_spec in all_set_specs():
        initial_snapshot[set_spec] = get_set_definitions(set_spec)
    write_message("Initial set snapshot: %s" % pformat(initial_snapshot),
                  verbose=2)

    task_update_progress("Fetching records to process")

    recids_with_oaiid = search_unit_in_bibxxx(p='*',
                                              f=CFG_OAI_ID_FIELD,
                                              type='e')
    write_message("%s recids have an OAI ID" % len(recids_with_oaiid),
                  verbose=2)

    all_current_recids = search_unit_in_bibxxx(p='*',
                                               f=CFG_OAI_SET_FIELD,
                                               type='e')
    no_more_exported_recids = intbitset(all_current_recids)
    write_message("%s recids are currently exported" %
                  (len(all_current_recids)),
                  verbose=2)

    all_affected_recids = intbitset()
    all_should_recids = intbitset()
    recids_for_set = {}
    for set_spec in all_set_specs():
        if not set_spec:
            set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
        should_recids = get_recids_for_set_spec(set_spec)
        recids_for_set[set_spec] = should_recids
        no_more_exported_recids -= should_recids
        all_should_recids |= should_recids
        current_recids = search_unit_in_bibxxx(p=set_spec,
                                               f=CFG_OAI_SET_FIELD,
                                               type='e')
        write_message(
            "%s recids should be in %s. Currently %s are in %s" %
            (len(should_recids), set_spec, len(current_recids), set_spec),
            verbose=2)
        to_add = should_recids - current_recids
        write_message("%s recids should be added to %s" %
                      (len(to_add), set_spec),
                      verbose=2)
        to_remove = current_recids - should_recids
        write_message("%s recids should be removed from %s" %
                      (len(to_remove), set_spec),
                      verbose=2)
        affected_recids = to_add | to_remove
        write_message("%s recids should be hence updated for %s" %
                      (len(affected_recids), set_spec),
                      verbose=2)
        all_affected_recids |= affected_recids

    missing_oaiid = all_should_recids - recids_with_oaiid
    write_message("%s recids are missing an oaiid" % len(missing_oaiid))
    write_message("%s recids should no longer be exported" %
                  len(no_more_exported_recids))

    ## Let's add records with missing OAI ID
    all_affected_recids |= missing_oaiid | no_more_exported_recids
    write_message("%s recids should updated" % (len(all_affected_recids)),
                  verbose=2)

    if not all_affected_recids:
        write_message("Nothing to do!")
        return True

    # Prepare to save results in a tmp file
    (fd, filename) = mkstemp(dir=CFG_TMPSHAREDDIR,
                                  prefix='oairepository_' + \
                                  time.strftime("%Y%m%d_%H%M%S_",
                                                time.localtime()))
    oai_out = os.fdopen(fd, "w")
    oai_out.write("<collection>")

    tot = 0
    # Iterate over the recids
    for i, recid in enumerate(all_affected_recids):
        task_sleep_now_if_required(can_stop_too=True)
        task_update_progress("Done %s out of %s records." % \
                             (i, len(all_affected_recids)))

        write_message("Elaborating recid %s" % recid, verbose=3)
        record = get_record(recid)
        if not record:
            write_message("Record %s seems empty. Let's skip it." % recid,
                          verbose=3)
            continue
        new_record = {}

        # Check if an OAI identifier is already in the record or
        # not.
        assign_oai_id_entry = False
        oai_id_entry = record_get_field_value(record,
                                              tag=CFG_OAI_ID_FIELD[:3],
                                              ind1=CFG_OAI_ID_FIELD[3],
                                              ind2=CFG_OAI_ID_FIELD[4],
                                              code=CFG_OAI_ID_FIELD[5])
        if not oai_id_entry:
            assign_oai_id_entry = True
            oai_id_entry = "oai:%s:%s" % (CFG_OAI_ID_PREFIX, recid)
            write_message("Setting new oai_id %s for record %s" %
                          (oai_id_entry, recid),
                          verbose=3)
        else:
            write_message("Already existing oai_id %s for record %s" %
                          (oai_id_entry, recid),
                          verbose=3)

        # Get the sets to which this record already belongs according
        # to the metadata
        current_oai_sets = set(
            record_get_field_values(record,
                                    tag=CFG_OAI_SET_FIELD[:3],
                                    ind1=CFG_OAI_SET_FIELD[3],
                                    ind2=CFG_OAI_SET_FIELD[4],
                                    code=CFG_OAI_SET_FIELD[5]))
        write_message("Record %s currently belongs to these oai_sets: %s" %
                      (recid, ", ".join(current_oai_sets)),
                      verbose=3)

        current_previous_oai_sets = set(
            record_get_field_values(record,
                                    tag=CFG_OAI_PREVIOUS_SET_FIELD[:3],
                                    ind1=CFG_OAI_PREVIOUS_SET_FIELD[3],
                                    ind2=CFG_OAI_PREVIOUS_SET_FIELD[4],
                                    code=CFG_OAI_PREVIOUS_SET_FIELD[5]))
        write_message(
            "Record %s currently doesn't belong anymore to these oai_sets: %s"
            % (recid, ", ".join(current_previous_oai_sets)),
            verbose=3)

        # Get the sets that should be in this record according to
        # settings
        updated_oai_sets = set(_set
                               for _set, _recids in iteritems(recids_for_set)
                               if recid in _recids)
        write_message("Record %s now belongs to these oai_sets: %s" %
                      (recid, ", ".join(updated_oai_sets)),
                      verbose=3)

        updated_previous_oai_sets = set(
            _set for _set in (current_previous_oai_sets - updated_oai_sets)
            | (current_oai_sets - updated_oai_sets))
        write_message(
            "Record %s now doesn't belong anymore to these oai_sets: %s" %
            (recid, ", ".join(updated_previous_oai_sets)),
            verbose=3)

        # Ok, we have the old sets and the new sets. If they are equal
        # and oai ID does not need to be added, then great, nothing to
        # change . Otherwise apply the new sets.
        if current_oai_sets == updated_oai_sets and not assign_oai_id_entry:
            write_message("Nothing has changed for record %s, let's move on!" %
                          recid,
                          verbose=3)
            continue  # Jump to next recid

        write_message("Something has changed for record %s, let's update it!" %
                      recid,
                      verbose=3)
        subfields = [(CFG_OAI_ID_FIELD[5], oai_id_entry)]
        for oai_set in updated_oai_sets:
            subfields.append((CFG_OAI_SET_FIELD[5], oai_set))
        for oai_set in updated_previous_oai_sets:
            subfields.append((CFG_OAI_PREVIOUS_SET_FIELD[5], oai_set))

        record_add_field(new_record, tag="001", controlfield_value=str(recid))
        record_add_field(new_record,
                         tag=CFG_OAI_ID_FIELD[:3],
                         ind1=CFG_OAI_ID_FIELD[3],
                         ind2=CFG_OAI_ID_FIELD[4],
                         subfields=subfields)
        oai_out.write(record_xml_output(new_record))
        tot += 1
        if tot == CFG_OAI_REPOSITORY_MARCXML_SIZE:
            oai_out.write("</collection>")
            oai_out.close()
            write_message("Wrote to file %s" % filename)
            if not no_upload:
                if task_get_option("notimechange"):
                    task_low_level_submission('bibupload', 'oairepository',
                                              '-c', filename, '-n',
                                              '-Noairepository', '-P', '-1')
                else:
                    task_low_level_submission('bibupload', 'oairepository',
                                              '-c', filename,
                                              '-Noairepository', '-P', '-1')
            # Prepare to save results in a tmp file
            (fd, filename) = mkstemp(dir=CFG_TMPSHAREDDIR,
                                        prefix='oairepository_' + \
                                        time.strftime("%Y%m%d_%H%M%S_",
                                                        time.localtime()))
            oai_out = os.fdopen(fd, "w")
            oai_out.write("<collection>")
            tot = 0
            task_sleep_now_if_required(can_stop_too=True)

    oai_out.write("</collection>")
    oai_out.close()
    write_message("Wrote to file %s" % filename)

    if tot > 0:
        if not no_upload:
            task_sleep_now_if_required(can_stop_too=True)
            if task_get_option("notimechange"):
                task_low_level_submission('bibupload', 'oairepository', '-c',
                                          filename, '-n')
            else:
                task_low_level_submission('bibupload', 'oairepository', '-c',
                                          filename)
    else:
        os.remove(filename)

    return True
Example #47
0
def oairepositoryupdater_task():
    """Main business logic code of oai_archive"""
    no_upload = task_get_option("no_upload")
    report = task_get_option("report")

    if report > 1:
        print_repository_status(verbose=report)
        return True

    initial_snapshot = {}
    for set_spec in all_set_specs():
        initial_snapshot[set_spec] = get_set_definitions(set_spec)
    write_message("Initial set snapshot: %s" % pformat(initial_snapshot), verbose=2)

    task_update_progress("Fetching records to process")

    recids_with_oaiid = search_unit_in_bibxxx(p='*', f=CFG_OAI_ID_FIELD, type='e')
    write_message("%s recids have an OAI ID" % len(recids_with_oaiid), verbose=2)

    all_current_recids = search_unit_in_bibxxx(p='*', f=CFG_OAI_SET_FIELD, type='e')
    no_more_exported_recids = intbitset(all_current_recids)
    write_message("%s recids are currently exported" % (len(all_current_recids)), verbose=2)

    all_affected_recids = intbitset()
    all_should_recids = intbitset()
    recids_for_set = {}
    for set_spec in all_set_specs():
        if not set_spec:
            set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
        should_recids = get_recids_for_set_spec(set_spec)
        recids_for_set[set_spec] = should_recids
        no_more_exported_recids -= should_recids
        all_should_recids |= should_recids
        current_recids = search_unit_in_bibxxx(p=set_spec, f=CFG_OAI_SET_FIELD, type='e')
        write_message("%s recids should be in %s. Currently %s are in %s" % (len(should_recids), set_spec, len(current_recids), set_spec), verbose=2)
        to_add = should_recids - current_recids
        write_message("%s recids should be added to %s" % (len(to_add), set_spec), verbose=2)
        to_remove = current_recids - should_recids
        write_message("%s recids should be removed from %s" % (len(to_remove), set_spec), verbose=2)
        affected_recids = to_add | to_remove
        write_message("%s recids should be hence updated for %s" % (len(affected_recids), set_spec), verbose=2)
        all_affected_recids |= affected_recids

    missing_oaiid = all_should_recids - recids_with_oaiid
    write_message("%s recids are missing an oaiid" % len(missing_oaiid))
    write_message("%s recids should no longer be exported" % len(no_more_exported_recids))

    ## Let's add records with missing OAI ID
    all_affected_recids |= missing_oaiid | no_more_exported_recids
    write_message("%s recids should updated" % (len(all_affected_recids)), verbose=2)

    if not all_affected_recids:
        write_message("Nothing to do!")
        return True

    # Prepare to save results in a tmp file
    (fd, filename) = mkstemp(dir=CFG_TMPDIR,
                                  prefix='oairepository_' + \
                                  time.strftime("%Y%m%d_%H%M%S_",
                                                time.localtime()))
    oai_out = os.fdopen(fd, "w")
    oai_out.write("<collection>")

    tot = 0
    # Iterate over the recids
    for i, recid in enumerate(all_affected_recids):
        task_sleep_now_if_required(can_stop_too=True)
        task_update_progress("Done %s out of %s records." % \
                             (i, len(all_affected_recids)))

        write_message("Elaborating recid %s" % recid, verbose=3)
        record = get_record(recid)
        if not record:
            write_message("Record %s seems empty. Let's skip it." % recid, verbose=3)
            continue
        new_record = {}

        # Check if an OAI identifier is already in the record or
        # not.
        assign_oai_id_entry = False
        oai_id_entry = record_get_field_value(record, tag=CFG_OAI_ID_FIELD[:3], ind1=CFG_OAI_ID_FIELD[3], ind2=CFG_OAI_ID_FIELD[4], code=CFG_OAI_ID_FIELD[5])
        if not oai_id_entry:
            assign_oai_id_entry = True
            oai_id_entry = "oai:%s:%s" % (CFG_OAI_ID_PREFIX, recid)
            write_message("Setting new oai_id %s for record %s" % (oai_id_entry, recid), verbose=3)
        else:
            write_message("Already existing oai_id %s for record %s" % (oai_id_entry, recid), verbose=3)

        # Get the sets to which this record already belongs according
        # to the metadata
        current_oai_sets = set(record_get_field_values(record, tag=CFG_OAI_SET_FIELD[:3], ind1=CFG_OAI_SET_FIELD[3], ind2=CFG_OAI_SET_FIELD[4], code=CFG_OAI_SET_FIELD[5]))
        write_message("Record %s currently belongs to these oai_sets: %s" % (recid, ", ".join(current_oai_sets)), verbose=3)

        current_previous_oai_sets = set(record_get_field_values(record, tag=CFG_OAI_PREVIOUS_SET_FIELD[:3], ind1=CFG_OAI_PREVIOUS_SET_FIELD[3], ind2=CFG_OAI_PREVIOUS_SET_FIELD[4], code=CFG_OAI_PREVIOUS_SET_FIELD[5]))
        write_message("Record %s currently doesn't belong anymore to these oai_sets: %s" % (recid, ", ".join(current_previous_oai_sets)), verbose=3)

        # Get the sets that should be in this record according to
        # settings
        updated_oai_sets = set(_set for _set, _recids in iteritems(recids_for_set)
             if recid in _recids)
        write_message("Record %s now belongs to these oai_sets: %s" % (recid, ", ".join(updated_oai_sets)), verbose=3)

        updated_previous_oai_sets = set(_set for _set in (current_previous_oai_sets - updated_oai_sets) |
             (current_oai_sets - updated_oai_sets))
        write_message("Record %s now doesn't belong anymore to these oai_sets: %s" % (recid, ", ".join(updated_previous_oai_sets)), verbose=3)

        # Ok, we have the old sets and the new sets. If they are equal
        # and oai ID does not need to be added, then great, nothing to
        # change . Otherwise apply the new sets.
        if current_oai_sets == updated_oai_sets and not assign_oai_id_entry:
            write_message("Nothing has changed for record %s, let's move on!" % recid, verbose=3)
            continue # Jump to next recid

        write_message("Something has changed for record %s, let's update it!" % recid, verbose=3)
        subfields = [(CFG_OAI_ID_FIELD[5], oai_id_entry)]
        for oai_set in updated_oai_sets:
            subfields.append((CFG_OAI_SET_FIELD[5], oai_set))
        for oai_set in updated_previous_oai_sets:
            subfields.append((CFG_OAI_PREVIOUS_SET_FIELD[5], oai_set))

        record_add_field(new_record, tag="001", controlfield_value=str(recid))
        record_add_field(new_record, tag=CFG_OAI_ID_FIELD[:3], ind1=CFG_OAI_ID_FIELD[3], ind2=CFG_OAI_ID_FIELD[4], subfields=subfields)
        oai_out.write(record_xml_output(new_record))
        tot += 1
        if tot == CFG_OAI_REPOSITORY_MARCXML_SIZE:
            oai_out.write("</collection>")
            oai_out.close()
            write_message("Wrote to file %s" % filename)
            if not no_upload:
                if task_get_option("notimechange"):
                    task_low_level_submission('bibupload', 'oairepository', '-c', filename, '-n')
                else:
                    task_low_level_submission('bibupload', 'oairepository', '-c', filename)
            # Prepare to save results in a tmp file
            (fd, filename) = mkstemp(dir=CFG_TMPDIR,
                                        prefix='oairepository_' + \
                                        time.strftime("%Y%m%d_%H%M%S_",
                                                        time.localtime()))
            oai_out = os.fdopen(fd, "w")
            oai_out.write("<collection>")
            tot = 0
            task_sleep_now_if_required(can_stop_too=True)

    oai_out.write("</collection>")
    oai_out.close()
    write_message("Wrote to file %s" % filename)

    if tot > 0:
        if not no_upload:
            task_sleep_now_if_required(can_stop_too=True)
            if task_get_option("notimechange"):
                task_low_level_submission('bibupload', 'oairepository', '-c', filename, '-n')
            else:
                task_low_level_submission('bibupload', 'oairepository', '-c', filename)
    else:
        os.remove(filename)

    return True
Example #48
0
 def add_field(self, tag, value, subfields=None):
     """ Add a field """
     tag = tag.replace("_", " ")
     record_add_field(self, tag[:3], tag[3], tag[4], value, subfields)
     self.set_amended("Added field %s" % tag)
Example #49
0
def perform_request_record(requestType, uid, data):
    """Handle 'major' record related requests.
    Handle retrieving, submitting or cancelling the merging session.
    """
    #TODO add checks before submission and cancel, replace get_bibrecord call
    result = {'resultCode': 0, 'resultText': ''}
    recid1 = data["recID1"]
    record1 = _get_record(recid1, uid, result)
    if result[
            'resultCode'] != 0:  #if record not accessible return error information
        return result

    if requestType == 'submit':
        if 'duplicate' in data:
            recid2 = data['duplicate']
            record2 = _get_record_slave(recid2, result, 'recid', uid)
            if result['resultCode'] != 0:  #return in case of error
                return result
            (errcode, message) = check_doi_status_after_merge(
                data["recID1"],
                data['duplicate'],
                record1,
                record2,
                record2_marked_as_duplicate_p=data.has_key('duplicate'),
                submit_confirmed_p=data.get('additional_data', {
                    'confirmed_submit': False
                }).get('confirmed_submit', False))
            if errcode:
                result['resultCode'] = errcode
                result['resultText'] = message
                return result

            # mark record2 as deleted
            record_add_field(record2, '980', ' ', ' ', '', [('c', 'DELETED')])
            # mark record2 as duplicate of record1
            record_add_field(record2, '970', ' ', ' ', '',
                             [('d', str(recid1))])
            # add recid of deleted record to master record
            record_add_field(record1, '981', ' ', ' ', '',
                             [('a', str(recid2))])

            # To ensure updates happen in order, use a seq id
            sequence_id = str(random.randrange(1, 4294967296))

            # submit record2 to be deleted
            xml_record2 = record_xml_output(record2)
            save_xml_record(recid2,
                            uid,
                            xml_record2,
                            task_name="bibmerge",
                            sequence_id=sequence_id)

            # submit record1
            xml_record1 = record_xml_output(record1)
            save_xml_record(recid1,
                            uid,
                            xml_record1,
                            task_name="bibmerge",
                            sequence_id=sequence_id)

            # Delete cache file if it exists
            if cache_exists(recid1, uid):
                delete_cache(recid1, uid)

            result['resultText'] = 'Records submitted'
            return result

        (errcode, message) = check_doi_status_after_merge(
            data["recID1"],
            data["recID2"],
            record1,
            None,
            submit_confirmed_p=data.get('additional_data', {
                'confirmed_submit': False
            }).get('confirmed_submit', False))
        if errcode:
            result['resultCode'] = errcode
            result['resultText'] = message
            return result

        #submit record1 from cache
        save_xml_record(recid1, uid, task_name="bibmerge")

        # Delete cache file if it exists
        if cache_exists(recid1, uid):
            delete_cache(recid1, uid)

        result['resultText'] = 'Record submitted'
        return result

    elif requestType == 'cancel':
        delete_cache(recid1, uid)
        result['resultText'] = 'Cancelled'
        return result

    recid2 = data["recID2"]
    mode = data['record2Mode']
    record2 = _get_record_slave(recid2, result, mode, uid)
    if result[
            'resultCode'] != 0:  #if record not accessible return error information
        return result

    if requestType == 'getRecordCompare':
        result['resultHtml'] = bibmerge_templates.BM_html_all_diff(
            record1, record2)
        result['resultText'] = 'Records compared'

    elif requestType == 'recCopy':
        copy_R2_to_R1(record1, record2)
        result['resultHtml'] = bibmerge_templates.BM_html_all_diff(
            record1, record2)
        result['resultText'] = 'Record copied'

    elif requestType == 'recMerge':
        merge_record(record1, record2, merge_conflicting_fields=True)
        result['resultHtml'] = bibmerge_templates.BM_html_all_diff(
            record1, record2)
        result['resultText'] = 'Records merged'

    elif requestType == 'recMergeNC':
        merge_record(record1, record2, merge_conflicting_fields=False)
        result['resultHtml'] = bibmerge_templates.BM_html_all_diff(
            record1, record2)
        result['resultText'] = 'Records merged'

    else:
        result['resultCode'], result['resultText'] = 1, 'Wrong request type'

    return result
Example #50
0
def add_basic_fields(rec, form, email):
    """
    Adds the basic fields from the form. Note that these fields are mapped
    to specific MARC fields. For information on the fields see the www.loc.gov
    website. For example http://www.loc.gov/marc/bibliographic/bd260.html
    contains information on field 260 for publication data.
    """
    # why aren't subfields a dictionary?!
    try:
        if form['title']:
            record_add_field(rec, '245', subfields=[('a', remove_html_markup(form['title']))])

        if form['creator']:
            fields = form.getlist('creator')
            for f in fields:
                if f and not f.isspace():
                    record_add_field(rec, '100', subfields=[('a', remove_html_markup(f.strip()))])

        if form['domain']:
            record_add_field(rec, '980', subfields=[('a', remove_html_markup(form['domain']))])
        pubfields = []
        if form['publisher']:
            pubfields.append(('b', remove_html_markup(form['publisher'])))
        if form.get('publication_date'):
            pubfields.append(('c', remove_html_markup(form['publication_date'])))
        if pubfields:
            record_add_field(rec, '260', subfields=pubfields)
        record_add_field(rec, '856', ind1='0', subfields=[('f', email)])

        if 'open_access' in form:
            record_add_field(rec, '542', subfields=[('l', 'open')])
        else:
            record_add_field(rec, '542', subfields=[('l', 'restricted')])

        if form['licence']:
            record_add_field(rec, '540', subfields=[('a', remove_html_markup(form['licence']))])
        record_add_field(rec, '520', subfields=[('a', remove_html_markup(form['description']))])

        if form['contact_email']:
            record_add_field(rec,'270',subfields=[('m', remove_html_markup(form['contact_email']))])

        if form['keywords']:
            for kw in form['keywords'].split(','):
                if kw and not kw.isspace():
                    record_add_field(rec, '653',
                                 ind1='1',
                                 subfields=[('a', remove_html_markup(kw.strip()))])

        if 'contributors' in form and form['contributors']:
            fields = form.getlist('contributors')
            for f in fields:
                if f and not f.isspace():
                    record_add_field(rec, '700', subfields=[('a', remove_html_markup(f.strip()))])

        record_add_field(rec, '546', subfields=[('a', remove_html_markup(form['language']))])

        # copying zenodo here, but I don't think 980 is the right MARC field
        if 'resource_type' in form and form['resource_type']:
            fields = form.getlist('resource_type')
            for f in fields:
                record_add_field(rec, '980', subfields=[('a', remove_html_markup(form['resource_type']))])

        if 'alternate_identifier' in form and form['alternate_identifier']:
            record_add_field(rec, '024',
                             subfields=[('a', remove_html_markup(form['alternate_identifier']))])

        if 'version' in form and form['version']:
            record_add_field(rec, '250', subfields=[('a', remove_html_markup(form['version']))])

        CFG_SITE_NAME = current_app.config.get("CFG_SITE_NAME")
        record_add_field(rec, '264',
                         subfields=[('b', CFG_SITE_NAME),
                                    ('c', str(datetime.utcnow()) + " UTC")])
    except Exception as e:
        current_app.logger.error(e)
        raise
Example #51
0
    def compare_records(self, record1, record2, opt_mode=None):
        """
        Compares two records to identify added/modified/deleted tags.

        The records are either the upload record or existing record or
        record archived.

        Returns a Tuple of Dictionaries(For modified/added/deleted tags).
        """
        def remove_control_tag(tag_list):
            """
            Returns the list of keys without any control tags
            """

            cleaned_list = [
                item for item in tag_list
                if item not in CFG_BIBUPLOAD_CONTROLFIELD_TAGS
            ]
            return cleaned_list

        def group_record_tags():
            """
            Groups all the tags in a Record as Common/Added/Deleted tags.
            Returns a Tuple of 3 lists for each category mentioned above.
            """
            rec1_keys = record1.keys()
            rec2_keys = record2.keys()

            com_tag_lst = [key for key in rec1_keys if key in rec2_keys]
            # tags in record2 not present in record1
            del_tag_lst = [key for key in rec2_keys if key not in rec1_keys]
            # additional tags in record1
            add_tag_lst = [key for key in rec1_keys if key not in rec2_keys]

            return (com_tag_lst, add_tag_lst, del_tag_lst)

        # declaring dictionaries to hold the identified patch
        mod_patch = {}
        add_patch = {}
        del_patch = {}
        result = {}

        (common_tags, added_tags, deleted_tags) = group_record_tags()
        if common_tags:
            mod_patch = self.find_modified_tags(common_tags, record1, record2)

        if added_tags:
            for tag in added_tags:
                add_patch[tag] = record1[tag]

        # if record comes with correct, it should already have fields
        # marked with '0' code. If not deleted tag list will
        if deleted_tags and \
                opt_mode == 'replace' or opt_mode == 'delete':
            for tag in deleted_tags:
                del_patch[tag] = record2[tag]

        # returning back a result dictionary with all available patches
        if mod_patch:
            result['MOD'] = mod_patch

        if add_patch:
            result['ADD'] = add_patch

        if del_patch:
            # for a tag that has been deleted in the upload record in replace
            # mode, loop through all the fields of the tag and add additional
            # subfield with code '0' and value '__DELETE_FIELDS__'
            # NOTE Indicators taken into consideration while deleting fields
            for tag in del_patch:
                for data_tuple in del_patch[tag]:
                    ind1 = data_tuple[1]
                    ind2 = data_tuple[2]
                    record_delete_field(del_patch, tag, ind1, ind2)
                    record_add_field(del_patch, tag, ind1, ind2, "", [
                        (CFG_BIBUPLOAD_DELETE_CODE, CFG_BIBUPLOAD_DELETE_VALUE)
                    ])
            result['DEL'] = del_patch

        return result
Example #52
0
def _prepare_marcxml(recid_a,
                     rn_a,
                     recids_and_rns_b,
                     what_is_a_for_b,
                     what_is_b_for_a,
                     display_in_a=True,
                     display_in_b=True,
                     marc_for_a=None,
                     marc_for_b=None,
                     upload_mode='append',
                     consider_empty_p=False):
    output = '<collection>'
    record_a = {}
    record_b = {}
    if what_is_b_for_a is not None:
        marc_tag_for_a, marc_ind1_for_a, marc_ind2_for_a = \
          _prepare_marc(marc_for_a, CFG_OTHER_RELATIONSHIP_ENTRY, display_in_a and "0" or "1")
        record_add_field(record_a, "001", controlfield_value=str(recid_a))
        if upload_mode == 'correct' and not recids_and_rns_b and consider_empty_p:
            # Add empty field in order to account for cases where all
            # linkings are removed by the submitter
            record_add_field(record_a,
                             marc_tag_for_a,
                             ind1=marc_ind1_for_a,
                             ind2=marc_ind2_for_a)
        for recid_b, rn_b in recids_and_rns_b:
            record_add_field(record_a,
                             marc_tag_for_a,
                             ind1=marc_ind1_for_a,
                             ind2=marc_ind2_for_a,
                             subfields=[('i', what_is_b_for_a), ('r', rn_b),
                                        ('w', str(recid_b))])
        output += record_xml_output(record_a)

    if what_is_a_for_b is not None:
        marc_tag_for_b, marc_ind1_for_b, marc_ind2_for_b = \
          _prepare_marc(marc_for_b, CFG_OTHER_RELATIONSHIP_ENTRY, display_in_b and "0" or "1")
        for recid_b, rn_b in recids_and_rns_b:
            record_b = {}
            record_add_field(record_b, "001", controlfield_value=str(recid_b))
            if upload_mode == 'correct':
                original_linking_fields = _get_record_linking_fields(
                    recid_b, recid_a, marc_tag_for_b, marc_ind1_for_b,
                    marc_ind2_for_b)
                record_add_fields(record_b, marc_tag_for_b,
                                  original_linking_fields)
            record_add_field(record_b,
                             marc_tag_for_b,
                             ind1=marc_ind1_for_b,
                             ind2=marc_ind2_for_b,
                             subfields=[('i', what_is_a_for_b), ('r', rn_a),
                                        ('w', str(recid_a))])
            output += record_xml_output(record_b)
        # Remove linking in remote records where adequate
        if consider_empty_p:
            unlinked_recids = get_unlinked_records(recid_a, marc_for_b,
                                                   display_in_b, upload_mode,
                                                   recids_and_rns_b)
            for recid_b in unlinked_recids:
                record_b = {}
                record_add_field(record_b,
                                 "001",
                                 controlfield_value=str(recid_b))
                original_linking_fields = _get_record_linking_fields(
                    recid_b, recid_a, marc_tag_for_b, marc_ind1_for_b,
                    marc_ind2_for_b)
                if not original_linking_fields:
                    # Add empty field in order to account for cases where all
                    # linkings are removed by the submitter
                    record_add_field(record_b,
                                     marc_tag_for_b,
                                     ind1=marc_ind1_for_b,
                                     ind2=marc_ind2_for_b)
                record_add_fields(record_b, marc_tag_for_b,
                                  original_linking_fields)
                output += record_xml_output(record_b)
    output += '</collection>'
    return output
Example #53
0
def add_basic_fields(rec, form, meta):
    """
    Adds the basic fields from the form. Note that these fields are mapped
    to specific MARC fields. For information on the fields see the www.loc.gov
    website. For example http://www.loc.gov/marc/bibliographic/bd260.html
    contains information on field 260 for publication data.
    """
    # why aren't subfields a dictionary?!
    try:
        if form.get('title'):
            record_add_field(rec, '245', subfields=[('a', remove_html_markup(form['title']))])

        if form.get('creator'):
            fields = form.getlist('creator')
            for f in fields:
                if f and not f.isspace():
                    record_add_field(rec, '100', subfields=[('a', remove_html_markup(f.strip()))])

        if form.get('domain'):
            record_add_field(rec, '980', subfields=[('a', remove_html_markup(form['domain']))])

        pubfields = []
        pubfields.append(('b', remove_html_markup(
                            form.get('publisher', meta.publisher_default))))
        if form.get('publication_date'):
            pubfields.append(('c', remove_html_markup(form['publication_date'])))
        if pubfields:
            record_add_field(rec, '260', subfields=pubfields)

        if 'open_access' in form:
            record_add_field(rec, '542', subfields=[('l', 'open')])
        else:
            record_add_field(rec, '542', subfields=[('l', 'restricted')])

        if form.get('licence'):
            record_add_field(rec, '540', subfields=[('a', remove_html_markup(form['licence']))])
        record_add_field(rec, '520', subfields=[('a', remove_html_markup(form['description']))])

        if form.get('contact_email'):
            record_add_field(rec, '270', subfields=[('m', remove_html_markup(form['contact_email']))])

        if form.get('keywords'):
            for f in form.getlist('keywords'):
                for kw in f.split(','):
                    if kw and not kw.isspace():
                        record_add_field(rec, '653', ind1='1', subfields=[('a', remove_html_markup(kw.strip()))])

        if form.get('contributors'):
            fields = form.getlist('contributors')
            for f in fields:
                if f and not f.isspace():
                    record_add_field(rec, '700', subfields=[('a', remove_html_markup(f.strip()))])

        record_add_field(rec, '546', subfields=[('a', remove_html_markup(
                            form.get('language', meta.language_default)))])

        if form.get('resource_type'):
            fields = form.getlist('resource_type')
            for f in fields:
                record_add_field(rec, '337', subfields=[('a', remove_html_markup(f))])
        # Special case for the 'Linguistics' domain:
        # All the ling_resource_type(s) are also resource_type(s), going into '337'
        if form.get('ling_resource_type'):
            fields = form.getlist('ling_resource_type')
            for f in fields:
                record_add_field(rec, '337', subfields=[('a', remove_html_markup(f))])

        if form.get('alternate_identifier'):
            record_add_field(rec, '024', subfields=[('a', remove_html_markup(form['alternate_identifier']))])

        if form.get('version'):
            record_add_field(rec, '250', subfields=[('a', remove_html_markup(form['version']))])

        if form.get('discipline'):
            fields = form.getlist('discipline')
            for f in fields:
                record_add_field(rec, '526', subfields=[('a', remove_html_markup(f))])

        CFG_SITE_NAME = current_app.config.get("CFG_SITE_NAME")
        record_add_field(rec, '264',
                         subfields=[('b', CFG_SITE_NAME),
                                    ('c', str(datetime.utcnow()) + " UTC")])
    except Exception as e:
        current_app.logger.error(e)
        raise
Example #54
0
def main():
    import invenio.modules.editor.models
    import invenio.modules.editor.views

    from invenio.legacy.search_engine import get_record
    from invenio.legacy.bibrecord import (
        record_delete_field,
        record_add_field,
    )
    from invenio.legacy.bibupload.engine import (
        bibupload, )

    for a in itertools.count(1):
        old_rec = get_record(a)
        rec = get_record(a)

        if not rec:
            break

        print('Processing record: {0}'.format(a))

        old_337 = [f[0] for f in rec.get('337', [])]
        new_337 = old_337[:]
        new_690 = []
        new_980 = []
        for f in rec.get('980', []):
            for sf in f[0]:
                if sf[0] == 'a' and sf[1] in TYPES:
                    if [sf] not in new_337:
                        new_337.append([sf])
                else:
                    if [sf] not in new_980:
                        new_980.append([sf])

        for f in rec.get('690', []):
            sfs = f[0]
            if sfs[0][0] == 'a' and sfs[0][1] == 'ling_resource_type':
                res_type = sfs[1][1]
                if res_type in TYPES:
                    if [('a', res_type)] not in new_337:
                        new_337.append([('a', res_type)])
                else:
                    print("Unrecognized 'ling_resource_type' value! '{0}'".
                          format(res_type))
            else:
                if sfs not in new_690:
                    new_690.append(sfs)

        if not new_337 == old_337:
            record_delete_field(rec, '337')
            record_delete_field(rec, '980')
            record_delete_field(rec, '690')
            for f in new_337:
                record_add_field(rec, '337', subfields=f)
            for f in new_980:
                record_add_field(rec, '980', subfields=f)
            for f in new_690:
                record_add_field(rec, '690', subfields=f)

            print('\nOld 337:')
            pprint(old_rec.get('337'))
            print('New 337:')
            pprint(rec.get('337'))

            print('\nOld 690:')
            pprint(old_rec.get('690'))
            print('New 690:')
            pprint(rec.get('690'))

            print('\nOld 980:')
            pprint(old_rec.get('980'))
            print('New 980:')
            pprint(rec.get('980'))
            if raw_input('Bibupload (y/n)? ') == 'y':
                bibupload(rec, 'replace')