def test_check_altered(self):
     """bibmatch - check altered match"""
     from invenio.legacy.bibrecord import record_has_field
     records = create_records(self.recxml4)
     self.assertTrue(not record_has_field(records[0][0], '001'))
     [dummy1, matchedrecs, dummy3, dummy4] = match_records(records, \
                                                           modify=1, \
                                                           verbose=0)
     self.assertTrue(record_has_field(matchedrecs[0][0], '001'))
 def test_check_altered(self):
     """bibmatch - check altered match"""
     from invenio.legacy.bibrecord import record_has_field
     records = create_records(self.recxml4)
     self.assertTrue(not record_has_field(records[0][0], '001'))
     [dummy1, matchedrecs, dummy3, dummy4] = match_records(records, \
                                                           modify=1, \
                                                           verbose=0)
     self.assertTrue(record_has_field(matchedrecs[0][0], '001'))
Example #3
0
def _record_has_id_p(record, recid, rec_oaiid, rec_sysno):
    """Check if record matches any of the given IDs."""
    if record_has_field(record, "001"):
        if record_get_field_value(record, "001", "%", "%") == str(recid):
            return True
    if record_has_field(record, OAIID_TAG[0:3]):
        if record_get_field_value(record, OAIID_TAG[0:3], OAIID_TAG[3], OAIID_TAG[4], OAIID_TAG[5]) == rec_oaiid:
            return True
    if record_has_field(record, SYSNO_TAG[0:3]):
        if record_get_field_value(record, SYSNO_TAG[0:3], SYSNO_TAG[3], SYSNO_TAG[4], SYSNO_TAG[5]) == rec_sysno:
            return True
    return False
Example #4
0
def _record_has_id_p(record, recid, rec_oaiid, rec_sysno):
    """Check if record matches any of the given IDs."""
    if record_has_field(record, '001'):
        if record_get_field_value(record, '001', '%', '%') == str(recid):
            return True
    if record_has_field(record, OAIID_TAG[0:3]):
        if (record_get_field_value(record, OAIID_TAG[0:3], OAIID_TAG[3],
                                   OAIID_TAG[4], OAIID_TAG[5]) == rec_oaiid):
            return True
    if record_has_field(record, SYSNO_TAG[0:3]):
        if (record_get_field_value(record, SYSNO_TAG[0:3], SYSNO_TAG[3],
                                   SYSNO_TAG[4], SYSNO_TAG[5]) == rec_sysno):
            return True
    return False
Example #5
0
def update_references(recid, overwrite=True):
    """Update references for a record

    First, we extract references from a record.
    Then, we are not updating the record directly but adding a bibupload
    task in -c mode which takes care of updating the record.

    Parameters:
    * recid: the id of the record
    """

    if not overwrite:
        # Check for references in record
        record = get_record(recid)
        if record and record_has_field(record, '999'):
            raise RecordHasReferences('Record has references and overwrite '
                                      'mode is disabled: %s' % recid)

    if get_fieldvalues(recid, '999C59'):
        raise RecordHasReferences('Record has been curated: %s' % recid)

    # Parse references
    references_xml = extract_references_from_record_xml(recid)

    # Save new record to file
    (temp_fd, temp_path) = mkstemp(prefix=CFG_REFEXTRACT_FILENAME,
                                   dir=CFG_TMPSHAREDDIR)
    temp_file = os.fdopen(temp_fd, 'w')
    temp_file.write(references_xml)
    temp_file.close()

    # Update record
    task_low_level_submission('bibupload', 'refextract', '-P', '4',
                              '-c', temp_path)
Example #6
0
def merge_record_with_template(rec, template_name, is_hp_record=False):
    """ Extend the record rec with the contents of the template and return it"""
    template = get_record_template(template_name)
    if not template:
        return
    template_bibrec = create_record(template)[0]
    # if the record is a holding pen record make all subfields volatile
    if is_hp_record:
        record_make_all_subfields_volatile(template_bibrec)
    for field_tag in template_bibrec:
        if not record_has_field(rec, field_tag):
            for field_instance in template_bibrec[field_tag]:
                record_add_field(rec, field_tag, field_instance[1], field_instance[2], subfields=field_instance[0])
        else:
            for template_field_instance in template_bibrec[field_tag]:
                subfield_codes_template = field_get_subfield_codes(template_field_instance)
                for field_instance in rec[field_tag]:
                    subfield_codes = field_get_subfield_codes(field_instance)
                    for code in subfield_codes_template:
                        if code not in subfield_codes:
                            field_add_subfield(
                                field_instance, code, field_get_subfield_values(template_field_instance, code)[0]
                            )
    record_order_subfields(rec)
    return rec
Example #7
0
def merge_record_with_template(rec, template_name, is_hp_record=False):
    """ Extend the record rec with the contents of the template and return it"""
    template = get_record_template(template_name)
    if not template:
        return
    template_bibrec = create_record(template)[0]
    # if the record is a holding pen record make all subfields volatile
    if is_hp_record:
        record_make_all_subfields_volatile(template_bibrec)
    for field_tag in template_bibrec:
        if not record_has_field(rec, field_tag):
            for field_instance in template_bibrec[field_tag]:
                record_add_field(rec,
                                 field_tag,
                                 field_instance[1],
                                 field_instance[2],
                                 subfields=field_instance[0])
        else:
            for template_field_instance in template_bibrec[field_tag]:
                subfield_codes_template = field_get_subfield_codes(
                    template_field_instance)
                for field_instance in rec[field_tag]:
                    subfield_codes = field_get_subfield_codes(field_instance)
                    for code in subfield_codes_template:
                        if code not in subfield_codes:
                            field_add_subfield(
                                field_instance, code,
                                field_get_subfield_values(
                                    template_field_instance, code)[0])
    record_order_subfields(rec)
    return rec
Example #8
0
def merge_record_with_template(rec, template_name):
    """ Extend the record rec with the contents of the template and return it"""
    template = get_record_template(template_name)
    if not template:
        return
    template_bibrec = create_record(template)[0]

    for field_tag in template_bibrec:
        if not record_has_field(rec, field_tag):
            for field_instance in template_bibrec[field_tag]:
                record_add_field(rec, field_tag, field_instance[1],
                                 field_instance[2], subfields=field_instance[0])
        else:
            for template_field_instance in template_bibrec[field_tag]:
                subfield_codes_template = field_get_subfield_codes(template_field_instance)
                for field_instance in rec[field_tag]:
                    subfield_codes = field_get_subfield_codes(field_instance)
                    for code in subfield_codes_template:
                        if code not in subfield_codes:
                            field_add_subfield(field_instance, code,
                                               field_get_subfield_values(template_field_instance,
                                               code)[0])
    return rec
Example #9
0
def add_field(rec1, rec2, fnum, findex1, findex2):
    """Adds the field of rec2 into rec1 in a position that depends on the
    diffing of rec1 with rec2.
    @param rec1: First record (a record dictionary structure)
    @param rec2: Second record (a record dictionary structure)
    @param fnum: a 3 characters long string indicating field tag number
    @param findex1: the rec1 field position in the group of fields it belongs
    @param findex2: the rec2 field position in the group of fields it belongs
    """
    field_to_add = rec2[fnum][findex2]
    ### if findex1 indicates an existing field in rec1, insert the field of rec2
    ### before the field of rec1
    if findex1 is not None:
        record_add_fields(rec1, fnum, [field_to_add], findex1)
        return

    ### check if field tag does not exist in record1
    if not record_has_field(rec1, fnum):
        record_add_fields(rec1, fnum, [field_to_add])  #insert at the beginning
        return

    ### if findex1 is None and the fieldtag already exists
    #get diffs for all indicators of the field.
    alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields)
    alldiffs = alldiffs[
        1]  #keep only the list of diffs by indicators (without the 'c')
    diff = _combine_diffs(alldiffs)  #combine results in one list

    #find the position of the field after which the insertion should take place
    findex1 = -1
    for m in diff:
        if m[1] == findex2:
            break
        if m[0] is not None:
            findex1 = m[0]
    #finally add the field (one position after)
    record_add_fields(rec1, fnum, [field_to_add], findex1 + 1)
Example #10
0
def add_field(rec1, rec2, fnum, findex1, findex2):
    """Adds the field of rec2 into rec1 in a position that depends on the
    diffing of rec1 with rec2.
    @param rec1: First record (a record dictionary structure)
    @param rec2: Second record (a record dictionary structure)
    @param fnum: a 3 characters long string indicating field tag number
    @param findex1: the rec1 field position in the group of fields it belongs
    @param findex2: the rec2 field position in the group of fields it belongs
    """
    field_to_add = rec2[fnum][findex2]
    ### if findex1 indicates an existing field in rec1, insert the field of rec2
    ### before the field of rec1
    if findex1 is not None:
        record_add_fields(rec1, fnum, [field_to_add], findex1)
        return

    ### check if field tag does not exist in record1
    if not record_has_field(rec1, fnum):
        record_add_fields(rec1, fnum, [field_to_add])  # insert at the beginning
        return

    ### if findex1 is None and the fieldtag already exists
    # get diffs for all indicators of the field.
    alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields)
    alldiffs = alldiffs[1]  # keep only the list of diffs by indicators (without the 'c')
    diff = _combine_diffs(alldiffs)  # combine results in one list

    # find the position of the field after which the insertion should take place
    findex1 = -1
    for m in diff:
        if m[1] == findex2:
            break
        if m[0] is not None:
            findex1 = m[0]
    # finally add the field (one position after)
    record_add_fields(rec1, fnum, [field_to_add], findex1 + 1)
Example #11
0
def merge_field_group(rec1,
                      rec2,
                      fnum,
                      ind1='',
                      ind2='',
                      merge_conflicting_fields=False):
    """Merges non-conflicting fields from 'rec2' to 'rec1' for a specific tag.
    the second record.
    @param rec1: First record (a record dictionary structure)
    @param rec2: Second record (a record dictionary structure)
    @param fnum: a 3 characters long string indicating field tag number
    @param ind1: a 1 character long string
    @param ind2: a 1 character long string
    @param merge_conflicting_fields: whether to merge conflicting fields or not
    """
    ### Check if merging goes for all indicators and set a boolean
    merging_all_indicators = not ind1 and not ind2

    ### check if there is no field in rec2 to be merged in rec1
    if not record_has_field(rec2, fnum):
        return

    ### get fields of rec2
    if merging_all_indicators:
        fields2 = record_get_field_instances(rec2, fnum, '%', '%')
    else:
        fields2 = record_get_field_instances(rec2, fnum, ind1, ind2)
    if len(fields2) == 0:
        return

    ### check if field in rec1 doesn't even exist
    if not record_has_field(rec1, fnum):
        record_add_fields(rec1, fnum, fields2)
        return

    ### compare the fields, get diffs for given indicators
    alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields,
                                 ind1, ind2)

    ### check if fields are the same
    if alldiffs is None:
        return  #nothing to merge

    ### find the diffing for the fields of the given indicators

    alldiffs = alldiffs[
        1]  #keep only the list of diffs by indicators (without the 'c')

    if merging_all_indicators:
        #combine the diffs for each indicator to one list
        diff = _combine_diffs(alldiffs)
    else:  #diffing for one indicator
        for diff in alldiffs:  #look for indicator pair in diff result
            if diff[0] == (ind1, ind2):
                break
        else:
            raise Exception, "Indicators not in diff result."
        diff = diff[
            1]  #keep only the list of diffs (without the indicator tuple)

    ### proceed to merging fields in a new field list
    fields1, fields2 = rec1[fnum], rec2[fnum]
    new_fields = []
    if merge_conflicting_fields == False:  #merge non-conflicting fields
        for m in diff:  #for every match of fields in the diff
            if m[0] is not None:  #if rec1 has a field in the diff, keep it
                new_fields.append(deepcopy(fields1[m[0]]))
            else:  #else take the field from rec2
                new_fields.append(deepcopy(fields2[m[1]]))
    else:  #merge all fields
        for m in diff:  #for every match of fields in the diff
            if m[1] is not None:  #if rec2 has a field, add it
                new_fields.append(deepcopy(fields2[m[1]]))
                if m[0] is not None and fields1[m[0]][0] != fields2[m[1]][0]:
                    #if the fields are not the same then add the field of rec1
                    new_fields.append(deepcopy(fields1[m[0]]))
            else:
                new_fields.append(deepcopy(fields1[m[0]]))

    ### delete existing fields
    record_delete_field(rec1, fnum, ind1, ind2)
    ## find where the new_fields should be inserted in rec1 (insert_index)
    if merging_all_indicators:
        insert_index = 0
    else:
        insert_index = None
        ind_pair = (ind1, ind2)
        first_last_dict = _first_and_last_index_for_each_indicator(
            rec1.get(fnum, []))
        #find the indicator pair which is just before the one which will be inserted
        indicators = first_last_dict.keys()
        indicators.sort()
        ind_pair_before = None
        for pair in indicators:
            if pair > ind_pair:
                break
            else:
                ind_pair_before = pair
        if ind_pair_before is None:  #if no smaller indicator pair exists
            insert_index = 0  #insertion will take place at the beginning
        else:  #else insert after the last field index of the previous indicator pair
            insert_index = first_last_dict[ind_pair_before][1] + 1

    ### add the new (merged) fields in correct 'in_field_index' position
    record_add_fields(rec1, fnum, new_fields, insert_index)
    return
Example #12
0
def merge_field_group(rec1, rec2, fnum, ind1="", ind2="", merge_conflicting_fields=False):
    """Merges non-conflicting fields from 'rec2' to 'rec1' for a specific tag.
    the second record.
    @param rec1: First record (a record dictionary structure)
    @param rec2: Second record (a record dictionary structure)
    @param fnum: a 3 characters long string indicating field tag number
    @param ind1: a 1 character long string
    @param ind2: a 1 character long string
    @param merge_conflicting_fields: whether to merge conflicting fields or not
    """
    ### Check if merging goes for all indicators and set a boolean
    merging_all_indicators = not ind1 and not ind2

    ### check if there is no field in rec2 to be merged in rec1
    if not record_has_field(rec2, fnum):
        return

    ### get fields of rec2
    if merging_all_indicators:
        fields2 = record_get_field_instances(rec2, fnum, "%", "%")
    else:
        fields2 = record_get_field_instances(rec2, fnum, ind1, ind2)
    if len(fields2) == 0:
        return

    ### check if field in rec1 doesn't even exist
    if not record_has_field(rec1, fnum):
        record_add_fields(rec1, fnum, fields2)
        return

    ### compare the fields, get diffs for given indicators
    alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields, ind1, ind2)

    ### check if fields are the same
    if alldiffs is None:
        return  # nothing to merge

    ### find the diffing for the fields of the given indicators

    alldiffs = alldiffs[1]  # keep only the list of diffs by indicators (without the 'c')

    if merging_all_indicators:
        # combine the diffs for each indicator to one list
        diff = _combine_diffs(alldiffs)
    else:  # diffing for one indicator
        for diff in alldiffs:  # look for indicator pair in diff result
            if diff[0] == (ind1, ind2):
                break
        else:
            raise Exception, "Indicators not in diff result."
        diff = diff[1]  # keep only the list of diffs (without the indicator tuple)

    ### proceed to merging fields in a new field list
    fields1, fields2 = rec1[fnum], rec2[fnum]
    new_fields = []
    if merge_conflicting_fields == False:  # merge non-conflicting fields
        for m in diff:  # for every match of fields in the diff
            if m[0] is not None:  # if rec1 has a field in the diff, keep it
                new_fields.append(deepcopy(fields1[m[0]]))
            else:  # else take the field from rec2
                new_fields.append(deepcopy(fields2[m[1]]))
    else:  # merge all fields
        for m in diff:  # for every match of fields in the diff
            if m[1] is not None:  # if rec2 has a field, add it
                new_fields.append(deepcopy(fields2[m[1]]))
                if m[0] is not None and fields1[m[0]][0] != fields2[m[1]][0]:
                    # if the fields are not the same then add the field of rec1
                    new_fields.append(deepcopy(fields1[m[0]]))
            else:
                new_fields.append(deepcopy(fields1[m[0]]))

    ### delete existing fields
    record_delete_field(rec1, fnum, ind1, ind2)
    ## find where the new_fields should be inserted in rec1 (insert_index)
    if merging_all_indicators:
        insert_index = 0
    else:
        insert_index = None
        ind_pair = (ind1, ind2)
        first_last_dict = _first_and_last_index_for_each_indicator(rec1.get(fnum, []))
        # find the indicator pair which is just before the one which will be inserted
        indicators = first_last_dict.keys()
        indicators.sort()
        ind_pair_before = None
        for pair in indicators:
            if pair > ind_pair:
                break
            else:
                ind_pair_before = pair
        if ind_pair_before is None:  # if no smaller indicator pair exists
            insert_index = 0  # insertion will take place at the beginning
        else:  # else insert after the last field index of the previous indicator pair
            insert_index = first_last_dict[ind_pair_before][1] + 1

    ### add the new (merged) fields in correct 'in_field_index' position
    record_add_fields(rec1, fnum, new_fields, insert_index)
    return