def delete_field(rec, fnum, findex): """Delete a specific field. @param rec: a record dictionary structure @param fnum: a 3 characters long string indicating field tag number @param findex: the rec field position in the group of fields it belongs """ record_delete_field(rec, fnum, field_position_local=findex)
def process_record(self, record): """@see: BaseFieldCommand.process_record""" if self._condition: self._delete_field_condition(record) else: bibrecord.record_delete_field(record, self._tag, self._ind1, self._ind2) self._modifications += 1
def compare_references(test, a, b): from invenio.legacy.bibrecord import create_record, record_xml_output, \ record_delete_field ## Let's normalize records to remove the Invenio refextract signature a = create_record(a)[0] b = create_record(b)[0] record_delete_field(a, '999', 'C', '6') a = record_xml_output(a) b = record_xml_output(b) test.assertEqual(a, b)
def _filter_fields(self, record, output_fields): """Removes from the record all the fields that are not output_fields. @param record: record structure (@see: bibrecord.py for details) @param output_fields: list of fields that should remain in the record @return: record containing only fields among output_fields """ # Tibor's new implementation: for tag in record.keys(): if tag not in output_fields: bibrecord.record_delete_fields(record, tag) return record # Rado's old implementation that leads to bibrecord-related # bug, see <https://savannah.cern.ch/task/?10267>: record_keys = record.keys() # Check if any of the tags, fields or subfields match # any value in output_fields. In case of match we leave # the element and its children in the record. # # If the element and all its children are not among the # output fields, it is deleted for tag in record_keys: tag = tag.lower() if tag not in output_fields: for (subfields, ind1, ind2, value, field_number) in record[tag]: current_field = tag + ind1.strip() + ind2.strip() current_field = current_field.lower() if current_field not in output_fields: delete_parents = True for (code, value) in subfields: current_subfield = current_field + code current_subfield = current_subfield.lower() if current_subfield not in output_fields: bibrecord.record_delete_subfield( record, tag, code, ind1, ind2) else: delete_parents = False if delete_parents: bibrecord.record_delete_field( record, tag, ind1, ind2) return record
def main(): from invenio.legacy.search_engine import get_record from invenio.legacy.bibupload.engine import ( bibupload, ) from invenio.legacy.bibrecord import ( record_add_field, record_delete_field, ) # Loop through list of records for r in RECORDS: old_rec = get_record(r) rec = get_record(r) if not rec: break print('Processing record: {0}'.format(r)) # pprint(rec) old_690 = [f[0] for f in rec.get('690', [])] new_690 = [] for f in old_690: a = f[0] b = f[1] t = [a, (b[0], VALUES.get(r))] if (a[0] == 'a' and a[1] == 'language_code' and b[0] == 'b' and VALUES.get(r)) \ else f new_690.append(t) if not new_690 == old_690: record_delete_field(rec, '690') for f in new_690: record_add_field(rec, '690', subfields=f) # pprint(rec) print('\nOld 690:') pprint(old_rec.get('690')) print('\nNew 690:') pprint(rec.get('690')) if raw_input('Bibupload (y/n)? ') == 'y': bibupload(rec, 'delete') sleep(5) bibupload(rec, 'replace')
def _filter_fields(self, record, output_fields): """Removes from the record all the fields that are not output_fields. @param record: record structure (@see: bibrecord.py for details) @param output_fields: list of fields that should remain in the record @return: record containing only fields among output_fields """ # Tibor's new implementation: for tag in record.keys(): if tag not in output_fields: bibrecord.record_delete_fields(record, tag) return record # Rado's old implementation that leads to bibrecord-related # bug, see <https://savannah.cern.ch/task/?10267>: record_keys = record.keys() # Check if any of the tags, fields or subfields match # any value in output_fields. In case of match we leave # the element and its children in the record. # # If the element and all its children are not among the # output fields, it is deleted for tag in record_keys: tag = tag.lower() if tag not in output_fields: for (subfields, ind1, ind2, value, field_number) in record[tag]: current_field = tag + ind1.strip() + ind2.strip() current_field = current_field.lower() if current_field not in output_fields: delete_parents = True for (code, value) in subfields: current_subfield = current_field + code current_subfield = current_subfield.lower() if current_subfield not in output_fields: bibrecord.record_delete_subfield(record, tag, code, ind1, ind2) else: delete_parents = False if delete_parents: bibrecord.record_delete_field(record, tag, ind1, ind2) return record
def _delete_field_condition(self, record): """Checks if a subfield meets the condition for the field to be deleted """ try: for field in record[self._tag]: subfield_exists = False for subfield in field[0]: if subfield[0] == self._conditionSubfield: subfield_exists = True if self._condition_does_not_exist == True: break if self._condition_exact_match: if self._condition == subfield[1]: bibrecord.record_delete_field(record, self._tag, self._ind1, self._ind2, field_position_global=field[4]) self._modifications += 1 break else: if self._condition in subfield[1]: bibrecord.record_delete_field(record, self._tag, self._ind1, self._ind2, field_position_global=field[4]) self._modifications += 1 break if subfield_exists == False and self._condition_does_not_exist: bibrecord.record_delete_field(record, self._tag, self._ind1, self._ind2, field_position_global=field[4]) self._modifications += 1 except KeyError: pass
def compare_records(self, record1, record2, opt_mode=None): """ Compares two records to identify added/modified/deleted tags. The records are either the upload record or existing record or record archived. Returns a Tuple of Dictionaries(For modified/added/deleted tags). """ def remove_control_tag(tag_list): """ Returns the list of keys without any control tags """ cleaned_list = [ item for item in tag_list if item not in CFG_BIBUPLOAD_CONTROLFIELD_TAGS ] return cleaned_list def group_record_tags(): """ Groups all the tags in a Record as Common/Added/Deleted tags. Returns a Tuple of 3 lists for each category mentioned above. """ rec1_keys = record1.keys() rec2_keys = record2.keys() com_tag_lst = [key for key in rec1_keys if key in rec2_keys] # tags in record2 not present in record1 del_tag_lst = [key for key in rec2_keys if key not in rec1_keys] # additional tags in record1 add_tag_lst = [key for key in rec1_keys if key not in rec2_keys] return (com_tag_lst, add_tag_lst, del_tag_lst) # declaring dictionaries to hold the identified patch mod_patch = {} add_patch = {} del_patch = {} result = {} (common_tags, added_tags, deleted_tags) = group_record_tags() if common_tags: mod_patch = self.find_modified_tags(common_tags, record1, record2) if added_tags: for tag in added_tags: add_patch[tag] = record1[tag] # if record comes with correct, it should already have fields # marked with '0' code. If not deleted tag list will if deleted_tags and \ opt_mode == 'replace' or opt_mode == 'delete': for tag in deleted_tags: del_patch[tag] = record2[tag] # returning back a result dictionary with all available patches if mod_patch: result['MOD'] = mod_patch if add_patch: result['ADD'] = add_patch if del_patch: # for a tag that has been deleted in the upload record in replace # mode, loop through all the fields of the tag and add additional # subfield with code '0' and value '__DELETE_FIELDS__' # NOTE Indicators taken into consideration while deleting fields for tag in del_patch: for data_tuple in del_patch[tag]: ind1 = data_tuple[1] ind2 = data_tuple[2] record_delete_field(del_patch, tag, ind1, ind2) record_add_field(del_patch, tag, ind1, ind2, "", [ (CFG_BIBUPLOAD_DELETE_CODE, CFG_BIBUPLOAD_DELETE_VALUE) ]) result['DEL'] = del_patch return result
def merge_field_group(rec1, rec2, fnum, ind1='', ind2='', merge_conflicting_fields=False): """Merges non-conflicting fields from 'rec2' to 'rec1' for a specific tag. the second record. @param rec1: First record (a record dictionary structure) @param rec2: Second record (a record dictionary structure) @param fnum: a 3 characters long string indicating field tag number @param ind1: a 1 character long string @param ind2: a 1 character long string @param merge_conflicting_fields: whether to merge conflicting fields or not """ ### Check if merging goes for all indicators and set a boolean merging_all_indicators = not ind1 and not ind2 ### check if there is no field in rec2 to be merged in rec1 if not record_has_field(rec2, fnum): return ### get fields of rec2 if merging_all_indicators: fields2 = record_get_field_instances(rec2, fnum, '%', '%') else: fields2 = record_get_field_instances(rec2, fnum, ind1, ind2) if len(fields2) == 0: return ### check if field in rec1 doesn't even exist if not record_has_field(rec1, fnum): record_add_fields(rec1, fnum, fields2) return ### compare the fields, get diffs for given indicators alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields, ind1, ind2) ### check if fields are the same if alldiffs is None: return #nothing to merge ### find the diffing for the fields of the given indicators alldiffs = alldiffs[ 1] #keep only the list of diffs by indicators (without the 'c') if merging_all_indicators: #combine the diffs for each indicator to one list diff = _combine_diffs(alldiffs) else: #diffing for one indicator for diff in alldiffs: #look for indicator pair in diff result if diff[0] == (ind1, ind2): break else: raise Exception, "Indicators not in diff result." diff = diff[ 1] #keep only the list of diffs (without the indicator tuple) ### proceed to merging fields in a new field list fields1, fields2 = rec1[fnum], rec2[fnum] new_fields = [] if merge_conflicting_fields == False: #merge non-conflicting fields for m in diff: #for every match of fields in the diff if m[0] is not None: #if rec1 has a field in the diff, keep it new_fields.append(deepcopy(fields1[m[0]])) else: #else take the field from rec2 new_fields.append(deepcopy(fields2[m[1]])) else: #merge all fields for m in diff: #for every match of fields in the diff if m[1] is not None: #if rec2 has a field, add it new_fields.append(deepcopy(fields2[m[1]])) if m[0] is not None and fields1[m[0]][0] != fields2[m[1]][0]: #if the fields are not the same then add the field of rec1 new_fields.append(deepcopy(fields1[m[0]])) else: new_fields.append(deepcopy(fields1[m[0]])) ### delete existing fields record_delete_field(rec1, fnum, ind1, ind2) ## find where the new_fields should be inserted in rec1 (insert_index) if merging_all_indicators: insert_index = 0 else: insert_index = None ind_pair = (ind1, ind2) first_last_dict = _first_and_last_index_for_each_indicator( rec1.get(fnum, [])) #find the indicator pair which is just before the one which will be inserted indicators = first_last_dict.keys() indicators.sort() ind_pair_before = None for pair in indicators: if pair > ind_pair: break else: ind_pair_before = pair if ind_pair_before is None: #if no smaller indicator pair exists insert_index = 0 #insertion will take place at the beginning else: #else insert after the last field index of the previous indicator pair insert_index = first_last_dict[ind_pair_before][1] + 1 ### add the new (merged) fields in correct 'in_field_index' position record_add_fields(rec1, fnum, new_fields, insert_index) return
def merge_field_group(rec1, rec2, fnum, ind1="", ind2="", merge_conflicting_fields=False): """Merges non-conflicting fields from 'rec2' to 'rec1' for a specific tag. the second record. @param rec1: First record (a record dictionary structure) @param rec2: Second record (a record dictionary structure) @param fnum: a 3 characters long string indicating field tag number @param ind1: a 1 character long string @param ind2: a 1 character long string @param merge_conflicting_fields: whether to merge conflicting fields or not """ ### Check if merging goes for all indicators and set a boolean merging_all_indicators = not ind1 and not ind2 ### check if there is no field in rec2 to be merged in rec1 if not record_has_field(rec2, fnum): return ### get fields of rec2 if merging_all_indicators: fields2 = record_get_field_instances(rec2, fnum, "%", "%") else: fields2 = record_get_field_instances(rec2, fnum, ind1, ind2) if len(fields2) == 0: return ### check if field in rec1 doesn't even exist if not record_has_field(rec1, fnum): record_add_fields(rec1, fnum, fields2) return ### compare the fields, get diffs for given indicators alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields, ind1, ind2) ### check if fields are the same if alldiffs is None: return # nothing to merge ### find the diffing for the fields of the given indicators alldiffs = alldiffs[1] # keep only the list of diffs by indicators (without the 'c') if merging_all_indicators: # combine the diffs for each indicator to one list diff = _combine_diffs(alldiffs) else: # diffing for one indicator for diff in alldiffs: # look for indicator pair in diff result if diff[0] == (ind1, ind2): break else: raise Exception, "Indicators not in diff result." diff = diff[1] # keep only the list of diffs (without the indicator tuple) ### proceed to merging fields in a new field list fields1, fields2 = rec1[fnum], rec2[fnum] new_fields = [] if merge_conflicting_fields == False: # merge non-conflicting fields for m in diff: # for every match of fields in the diff if m[0] is not None: # if rec1 has a field in the diff, keep it new_fields.append(deepcopy(fields1[m[0]])) else: # else take the field from rec2 new_fields.append(deepcopy(fields2[m[1]])) else: # merge all fields for m in diff: # for every match of fields in the diff if m[1] is not None: # if rec2 has a field, add it new_fields.append(deepcopy(fields2[m[1]])) if m[0] is not None and fields1[m[0]][0] != fields2[m[1]][0]: # if the fields are not the same then add the field of rec1 new_fields.append(deepcopy(fields1[m[0]])) else: new_fields.append(deepcopy(fields1[m[0]])) ### delete existing fields record_delete_field(rec1, fnum, ind1, ind2) ## find where the new_fields should be inserted in rec1 (insert_index) if merging_all_indicators: insert_index = 0 else: insert_index = None ind_pair = (ind1, ind2) first_last_dict = _first_and_last_index_for_each_indicator(rec1.get(fnum, [])) # find the indicator pair which is just before the one which will be inserted indicators = first_last_dict.keys() indicators.sort() ind_pair_before = None for pair in indicators: if pair > ind_pair: break else: ind_pair_before = pair if ind_pair_before is None: # if no smaller indicator pair exists insert_index = 0 # insertion will take place at the beginning else: # else insert after the last field index of the previous indicator pair insert_index = first_last_dict[ind_pair_before][1] + 1 ### add the new (merged) fields in correct 'in_field_index' position record_add_fields(rec1, fnum, new_fields, insert_index) return
def compare_records(self, record1, record2, opt_mode=None): """ Compares two records to identify added/modified/deleted tags. The records are either the upload record or existing record or record archived. Returns a Tuple of Dictionaries(For modified/added/deleted tags). """ def remove_control_tag(tag_list): """ Returns the list of keys without any control tags """ cleaned_list = [item for item in tag_list if item not in CFG_BIBUPLOAD_CONTROLFIELD_TAGS] return cleaned_list def group_record_tags(): """ Groups all the tags in a Record as Common/Added/Deleted tags. Returns a Tuple of 3 lists for each category mentioned above. """ rec1_keys = record1.keys() rec2_keys = record2.keys() com_tag_lst = [key for key in rec1_keys if key in rec2_keys] # tags in record2 not present in record1 del_tag_lst = [key for key in rec2_keys if key not in rec1_keys] # additional tags in record1 add_tag_lst = [key for key in rec1_keys if key not in rec2_keys] return (com_tag_lst, add_tag_lst, del_tag_lst) # declaring dictionaries to hold the identified patch mod_patch = {} add_patch = {} del_patch = {} result = {} (common_tags, added_tags, deleted_tags) = group_record_tags() if common_tags: mod_patch = self.find_modified_tags(common_tags, record1, record2) if added_tags: for tag in added_tags: add_patch[tag] = record1[tag] # if record comes with correct, it should already have fields # marked with '0' code. If not deleted tag list will if deleted_tags and \ opt_mode == 'replace' or opt_mode == 'delete': for tag in deleted_tags: del_patch[tag] = record2[tag] # returning back a result dictionary with all available patches if mod_patch: result['MOD'] = mod_patch if add_patch: result['ADD'] = add_patch if del_patch: # for a tag that has been deleted in the upload record in replace # mode, loop through all the fields of the tag and add additional # subfield with code '0' and value '__DELETE_FIELDS__' # NOTE Indicators taken into consideration while deleting fields for tag in del_patch: for data_tuple in del_patch[tag]: ind1 = data_tuple[1] ind2 = data_tuple[2] record_delete_field(del_patch, tag, ind1, ind2) record_add_field(del_patch, tag, ind1, ind2, "", [(CFG_BIBUPLOAD_DELETE_CODE, CFG_BIBUPLOAD_DELETE_VALUE)]) result['DEL'] = del_patch return result
def main(): import invenio.modules.editor.models import invenio.modules.editor.views from invenio.legacy.search_engine import get_record from invenio.legacy.bibrecord import ( record_delete_field, record_add_field, ) from invenio.legacy.bibupload.engine import ( bibupload, ) for a in itertools.count(1): old_rec = get_record(a) rec = get_record(a) if not rec: break print('Processing record: {0}'.format(a)) old_337 = [f[0] for f in rec.get('337', [])] new_337 = old_337[:] new_690 = [] new_980 = [] for f in rec.get('980', []): for sf in f[0]: if sf[0] == 'a' and sf[1] in TYPES: if [sf] not in new_337: new_337.append([sf]) else: if [sf] not in new_980: new_980.append([sf]) for f in rec.get('690', []): sfs = f[0] if sfs[0][0] == 'a' and sfs[0][1] == 'ling_resource_type': res_type = sfs[1][1] if res_type in TYPES: if [('a', res_type)] not in new_337: new_337.append([('a', res_type)]) else: print("Unrecognized 'ling_resource_type' value! '{0}'". format(res_type)) else: if sfs not in new_690: new_690.append(sfs) if not new_337 == old_337: record_delete_field(rec, '337') record_delete_field(rec, '980') record_delete_field(rec, '690') for f in new_337: record_add_field(rec, '337', subfields=f) for f in new_980: record_add_field(rec, '980', subfields=f) for f in new_690: record_add_field(rec, '690', subfields=f) print('\nOld 337:') pprint(old_rec.get('337')) print('New 337:') pprint(rec.get('337')) print('\nOld 690:') pprint(old_rec.get('690')) print('New 690:') pprint(rec.get('690')) print('\nOld 980:') pprint(old_rec.get('980')) print('New 980:') pprint(rec.get('980')) if raw_input('Bibupload (y/n)? ') == 'y': bibupload(rec, 'replace')