def replace_references(recid): """Replace references for a record The record itself is not updated, the marc xml of the document with updated references is returned Parameters: * recid: the id of the record """ # Parse references references_xml = extract_references_from_record_xml(recid) references = create_record(references_xml.encode("utf-8")) # Record marc xml record = get_record(recid) if references[0]: fields_to_add = record_get_field_instances(references[0], tag="999", ind1="%", ind2="%") # Replace 999 fields record_delete_fields(record, "999") record_add_fields(record, "999", fields_to_add) # Update record references out_xml = record_xml_output(record) else: out_xml = None return out_xml
def replace_references(recid): """Replace references for a record The record itself is not updated, the marc xml of the document with updated references is returned Parameters: * recid: the id of the record """ # Parse references references_xml = extract_references_from_record_xml(recid) references = create_record(references_xml.encode('utf-8')) # Record marc xml record = get_record(recid) if references[0]: fields_to_add = record_get_field_instances(references[0], tag='999', ind1='%', ind2='%') # Replace 999 fields record_delete_fields(record, '999') record_add_fields(record, '999', fields_to_add) # Update record references out_xml = record_xml_output(record) else: out_xml = None return out_xml
def create_xml(recid, arxiv_ids): old_record = get_record(recid) attached_files = record_get_field_instances(old_record, tag='856', ind1='4') fields_to_add = [f for f in attached_files if check_arxiv_url(f, arxiv_ids)] record = {} record_add_field(record, '001', controlfield_value=str(recid)) record_add_fields(record, '856', fields_to_add) return print_rec(record)
def create_our_record(recid): old_record = get_record(recid) instances = record_get_field_instances(old_record, '980') new_instances = [l.field for l in set(OurInstance(i) for i in instances if field_get_subfield_instances(i) != [('a', 'unknown')])] record = {} record_add_field(record, '001', controlfield_value=str(recid)) record_add_fields(record, '980', new_instances) return print_rec(record)
def _prepare_marcxml(recid_a, rn_a, recids_and_rns_b, what_is_a_for_b, what_is_b_for_a, display_in_a=True, display_in_b=True, marc_for_a=None, marc_for_b=None, upload_mode='append', consider_empty_p=False): output = '<collection>' record_a = {} record_b = {} if what_is_b_for_a is not None: marc_tag_for_a, marc_ind1_for_a, marc_ind2_for_a = \ _prepare_marc(marc_for_a, CFG_OTHER_RELATIONSHIP_ENTRY, display_in_a and "0" or "1") record_add_field(record_a, "001", controlfield_value=str(recid_a)) if upload_mode == 'correct' and not recids_and_rns_b and consider_empty_p: # Add empty field in order to account for cases where all # linkings are removed by the submitter record_add_field(record_a, marc_tag_for_a, ind1=marc_ind1_for_a, ind2=marc_ind2_for_a) for recid_b, rn_b in recids_and_rns_b: record_add_field(record_a, marc_tag_for_a, ind1=marc_ind1_for_a, ind2=marc_ind2_for_a, subfields=[('i', what_is_b_for_a), ('r', rn_b), ('w', str(recid_b))]) output += record_xml_output(record_a) if what_is_a_for_b is not None: marc_tag_for_b, marc_ind1_for_b, marc_ind2_for_b = \ _prepare_marc(marc_for_b, CFG_OTHER_RELATIONSHIP_ENTRY, display_in_b and "0" or "1") for recid_b, rn_b in recids_and_rns_b: record_b = {} record_add_field(record_b, "001", controlfield_value=str(recid_b)) if upload_mode == 'correct': original_linking_fields = _get_record_linking_fields(recid_b, recid_a, marc_tag_for_b, marc_ind1_for_b, marc_ind2_for_b) record_add_fields(record_b, marc_tag_for_b, original_linking_fields) record_add_field(record_b, marc_tag_for_b, ind1=marc_ind1_for_b, ind2=marc_ind2_for_b, subfields=[('i', what_is_a_for_b), ('r', rn_a), ('w', str(recid_a))]) output += record_xml_output(record_b) # Remove linking in remote records where adequate if consider_empty_p: unlinked_recids = get_unlinked_records(recid_a, marc_for_b, display_in_b, upload_mode, recids_and_rns_b) for recid_b in unlinked_recids: record_b = {} record_add_field(record_b, "001", controlfield_value=str(recid_b)) original_linking_fields = _get_record_linking_fields(recid_b, recid_a, marc_tag_for_b, marc_ind1_for_b, marc_ind2_for_b) if not original_linking_fields: # Add empty field in order to account for cases where all # linkings are removed by the submitter record_add_field(record_b, marc_tag_for_b, ind1=marc_ind1_for_b, ind2=marc_ind2_for_b) record_add_fields(record_b, marc_tag_for_b, original_linking_fields) output += record_xml_output(record_b) output += '</collection>' return output
def replace_references(recid, uid=None, txt=None, url=None): """Replace references for a record The record itself is not updated, the marc xml of the document with updated references is returned Parameters: * recid: the id of the record * txt: references in text mode * inspire: format of ther references """ # Parse references if txt is not None: references_xml = extract_references_from_string_xml( txt, is_only_references=True) elif url is not None: references_xml = extract_references_from_url_xml(url) else: references_xml = extract_references_from_record_xml(recid) references = create_record(references_xml.encode('utf-8')) dummy1, dummy2, record, dummy3, dummy4, dummy5, dummy6 = get_cache_file_contents( recid, uid) out_xml = None references_to_add = record_get_field_instances(references[0], tag='999', ind1='C', ind2='5') refextract_status = record_get_field_instances(references[0], tag='999', ind1='C', ind2='6') if references_to_add: # Replace 999 fields record_delete_fields(record, '999') record_add_fields(record, '999', references_to_add) record_add_fields(record, '999', refextract_status) # Update record references out_xml = record_xml_output(record) return out_xml
def replace_references(recid, uid=None, txt=None, url=None): """Replace references for a record The record itself is not updated, the marc xml of the document with updated references is returned Parameters: * recid: the id of the record * txt: references in text mode * inspire: format of ther references """ # Parse references if txt is not None: references_xml = extract_references_from_string_xml(txt, is_only_references=True) elif url is not None: references_xml = extract_references_from_url_xml(url) else: references_xml = extract_references_from_record_xml(recid) references = create_record(references_xml) dummy1, dummy2, record, dummy3, dummy4, dummy5, dummy6 = get_cache_contents(recid, uid) out_xml = None references_to_add = record_get_field_instances(references[0], tag='999', ind1='C', ind2='5') refextract_status = record_get_field_instances(references[0], tag='999', ind1='C', ind2='6') if references_to_add: # Replace 999 fields record_delete_fields(record, '999') record_add_fields(record, '999', references_to_add) record_add_fields(record, '999', refextract_status) # Update record references out_xml = record_xml_output(record) return out_xml
def add_field(rec1, rec2, fnum, findex1, findex2): """Adds the field of rec2 into rec1 in a position that depends on the diffing of rec1 with rec2. @param rec1: First record (a record dictionary structure) @param rec2: Second record (a record dictionary structure) @param fnum: a 3 characters long string indicating field tag number @param findex1: the rec1 field position in the group of fields it belongs @param findex2: the rec2 field position in the group of fields it belongs """ field_to_add = rec2[fnum][findex2] ### if findex1 indicates an existing field in rec1, insert the field of rec2 ### before the field of rec1 if findex1 is not None: record_add_fields(rec1, fnum, [field_to_add], findex1) return ### check if field tag does not exist in record1 if not record_has_field(rec1, fnum): record_add_fields(rec1, fnum, [field_to_add]) #insert at the beginning return ### if findex1 is None and the fieldtag already exists #get diffs for all indicators of the field. alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields) alldiffs = alldiffs[ 1] #keep only the list of diffs by indicators (without the 'c') diff = _combine_diffs(alldiffs) #combine results in one list #find the position of the field after which the insertion should take place findex1 = -1 for m in diff: if m[1] == findex2: break if m[0] is not None: findex1 = m[0] #finally add the field (one position after) record_add_fields(rec1, fnum, [field_to_add], findex1 + 1)
def add_field(rec1, rec2, fnum, findex1, findex2): """Adds the field of rec2 into rec1 in a position that depends on the diffing of rec1 with rec2. @param rec1: First record (a record dictionary structure) @param rec2: Second record (a record dictionary structure) @param fnum: a 3 characters long string indicating field tag number @param findex1: the rec1 field position in the group of fields it belongs @param findex2: the rec2 field position in the group of fields it belongs """ field_to_add = rec2[fnum][findex2] ### if findex1 indicates an existing field in rec1, insert the field of rec2 ### before the field of rec1 if findex1 is not None: record_add_fields(rec1, fnum, [field_to_add], findex1) return ### check if field tag does not exist in record1 if not record_has_field(rec1, fnum): record_add_fields(rec1, fnum, [field_to_add]) #insert at the beginning return ### if findex1 is None and the fieldtag already exists #get diffs for all indicators of the field. alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields) alldiffs = alldiffs[1] #keep only the list of diffs by indicators (without the 'c') diff = _combine_diffs(alldiffs) #combine results in one list #find the position of the field after which the insertion should take place findex1 = -1 for m in diff: if m[1] == findex2: break if m[0] is not None: findex1 = m[0] #finally add the field (one position after) record_add_fields(rec1, fnum, [field_to_add], findex1+1)
def merge_field_group(rec1, rec2, fnum, ind1='', ind2='', merge_conflicting_fields=False): """Merges non-conflicting fields from 'rec2' to 'rec1' for a specific tag. the second record. @param rec1: First record (a record dictionary structure) @param rec2: Second record (a record dictionary structure) @param fnum: a 3 characters long string indicating field tag number @param ind1: a 1 character long string @param ind2: a 1 character long string @param merge_conflicting_fields: whether to merge conflicting fields or not """ ### Check if merging goes for all indicators and set a boolean merging_all_indicators = not ind1 and not ind2 ### check if there is no field in rec2 to be merged in rec1 if not record_has_field(rec2, fnum): return ### get fields of rec2 if merging_all_indicators: fields2 = record_get_field_instances(rec2, fnum, '%', '%') else: fields2 = record_get_field_instances(rec2, fnum, ind1, ind2) if len(fields2)==0: return ### check if field in rec1 doesn't even exist if not record_has_field(rec1, fnum): record_add_fields(rec1, fnum, fields2) return ### compare the fields, get diffs for given indicators alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields, ind1, ind2) ### check if fields are the same if alldiffs is None: return #nothing to merge ### find the diffing for the fields of the given indicators alldiffs = alldiffs[1] #keep only the list of diffs by indicators (without the 'c') if merging_all_indicators: #combine the diffs for each indicator to one list diff = _combine_diffs(alldiffs) else: #diffing for one indicator for diff in alldiffs: #look for indicator pair in diff result if diff[0] == (ind1, ind2): break else: raise Exception, "Indicators not in diff result." diff = diff[1] #keep only the list of diffs (without the indicator tuple) ### proceed to merging fields in a new field list fields1, fields2 = rec1[fnum], rec2[fnum] new_fields = [] if merge_conflicting_fields == False: #merge non-conflicting fields for m in diff: #for every match of fields in the diff if m[0] is not None: #if rec1 has a field in the diff, keep it new_fields.append( deepcopy(fields1[m[0]]) ) else: #else take the field from rec2 new_fields.append( deepcopy(fields2[m[1]]) ) else: #merge all fields for m in diff: #for every match of fields in the diff if m[1] is not None: #if rec2 has a field, add it new_fields.append( deepcopy(fields2[m[1]]) ) if m[0] is not None and fields1[m[0]][0] != fields2[m[1]][0]: #if the fields are not the same then add the field of rec1 new_fields.append( deepcopy(fields1[m[0]]) ) else: new_fields.append( deepcopy(fields1[m[0]]) ) ### delete existing fields record_delete_field(rec1, fnum, ind1, ind2) ## find where the new_fields should be inserted in rec1 (insert_index) if merging_all_indicators: insert_index = 0 else: insert_index = None ind_pair = (ind1, ind2) first_last_dict = _first_and_last_index_for_each_indicator( rec1.get(fnum, []) ) #find the indicator pair which is just before the one which will be inserted indicators = first_last_dict.keys() indicators.sort() ind_pair_before = None for pair in indicators: if pair > ind_pair: break else: ind_pair_before = pair if ind_pair_before is None: #if no smaller indicator pair exists insert_index = 0 #insertion will take place at the beginning else: #else insert after the last field index of the previous indicator pair insert_index = first_last_dict[ind_pair_before][1] + 1 ### add the new (merged) fields in correct 'in_field_index' position record_add_fields(rec1, fnum, new_fields, insert_index) return
def create_our_record(recid, refs): record = {} record_add_field(record, '001', controlfield_value=str(recid)) record_add_fields(record, '999', refs) return print_rec(record)
def _prepare_marcxml(recid_a, rn_a, recids_and_rns_b, what_is_a_for_b, what_is_b_for_a, display_in_a=True, display_in_b=True, marc_for_a=None, marc_for_b=None, upload_mode='append', consider_empty_p=False): output = '<collection>' record_a = {} record_b = {} if what_is_b_for_a is not None: marc_tag_for_a, marc_ind1_for_a, marc_ind2_for_a = \ _prepare_marc(marc_for_a, CFG_OTHER_RELATIONSHIP_ENTRY, display_in_a and "0" or "1") record_add_field(record_a, "001", controlfield_value=str(recid_a)) if upload_mode == 'correct' and not recids_and_rns_b and consider_empty_p: # Add empty field in order to account for cases where all # linkings are removed by the submitter record_add_field(record_a, marc_tag_for_a, ind1=marc_ind1_for_a, ind2=marc_ind2_for_a) for recid_b, rn_b in recids_and_rns_b: record_add_field(record_a, marc_tag_for_a, ind1=marc_ind1_for_a, ind2=marc_ind2_for_a, subfields=[('i', what_is_b_for_a), ('r', rn_b), ('w', str(recid_b))]) output += record_xml_output(record_a) if what_is_a_for_b is not None: marc_tag_for_b, marc_ind1_for_b, marc_ind2_for_b = \ _prepare_marc(marc_for_b, CFG_OTHER_RELATIONSHIP_ENTRY, display_in_b and "0" or "1") for recid_b, rn_b in recids_and_rns_b: record_b = {} record_add_field(record_b, "001", controlfield_value=str(recid_b)) if upload_mode == 'correct': original_linking_fields = _get_record_linking_fields( recid_b, recid_a, marc_tag_for_b, marc_ind1_for_b, marc_ind2_for_b) record_add_fields(record_b, marc_tag_for_b, original_linking_fields) record_add_field(record_b, marc_tag_for_b, ind1=marc_ind1_for_b, ind2=marc_ind2_for_b, subfields=[('i', what_is_a_for_b), ('r', rn_a), ('w', str(recid_a))]) output += record_xml_output(record_b) # Remove linking in remote records where adequate if consider_empty_p: unlinked_recids = get_unlinked_records(recid_a, marc_for_b, display_in_b, upload_mode, recids_and_rns_b) for recid_b in unlinked_recids: record_b = {} record_add_field(record_b, "001", controlfield_value=str(recid_b)) original_linking_fields = _get_record_linking_fields( recid_b, recid_a, marc_tag_for_b, marc_ind1_for_b, marc_ind2_for_b) if not original_linking_fields: # Add empty field in order to account for cases where all # linkings are removed by the submitter record_add_field(record_b, marc_tag_for_b, ind1=marc_ind1_for_b, ind2=marc_ind2_for_b) record_add_fields(record_b, marc_tag_for_b, original_linking_fields) output += record_xml_output(record_b) output += '</collection>' return output
def create_our_record(recid, abstracts): record = {} record_add_field(record, '001', controlfield_value=str(recid)) record_add_fields(record, '520', abstracts[:1]) return print_rec(record)
def merge_field_group(rec1, rec2, fnum, ind1='', ind2='', merge_conflicting_fields=False): """Merges non-conflicting fields from 'rec2' to 'rec1' for a specific tag. the second record. @param rec1: First record (a record dictionary structure) @param rec2: Second record (a record dictionary structure) @param fnum: a 3 characters long string indicating field tag number @param ind1: a 1 character long string @param ind2: a 1 character long string @param merge_conflicting_fields: whether to merge conflicting fields or not """ ### Check if merging goes for all indicators and set a boolean merging_all_indicators = not ind1 and not ind2 ### check if there is no field in rec2 to be merged in rec1 if not record_has_field(rec2, fnum): return ### get fields of rec2 if merging_all_indicators: fields2 = record_get_field_instances(rec2, fnum, '%', '%') else: fields2 = record_get_field_instances(rec2, fnum, ind1, ind2) if len(fields2) == 0: return ### check if field in rec1 doesn't even exist if not record_has_field(rec1, fnum): record_add_fields(rec1, fnum, fields2) return ### compare the fields, get diffs for given indicators alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields, ind1, ind2) ### check if fields are the same if alldiffs is None: return #nothing to merge ### find the diffing for the fields of the given indicators alldiffs = alldiffs[ 1] #keep only the list of diffs by indicators (without the 'c') if merging_all_indicators: #combine the diffs for each indicator to one list diff = _combine_diffs(alldiffs) else: #diffing for one indicator for diff in alldiffs: #look for indicator pair in diff result if diff[0] == (ind1, ind2): break else: raise Exception, "Indicators not in diff result." diff = diff[ 1] #keep only the list of diffs (without the indicator tuple) ### proceed to merging fields in a new field list fields1, fields2 = rec1[fnum], rec2[fnum] new_fields = [] if merge_conflicting_fields == False: #merge non-conflicting fields for m in diff: #for every match of fields in the diff if m[0] is not None: #if rec1 has a field in the diff, keep it new_fields.append(deepcopy(fields1[m[0]])) else: #else take the field from rec2 new_fields.append(deepcopy(fields2[m[1]])) else: #merge all fields for m in diff: #for every match of fields in the diff if m[1] is not None: #if rec2 has a field, add it new_fields.append(deepcopy(fields2[m[1]])) if m[0] is not None and fields1[m[0]][0] != fields2[m[1]][0]: #if the fields are not the same then add the field of rec1 new_fields.append(deepcopy(fields1[m[0]])) else: new_fields.append(deepcopy(fields1[m[0]])) ### delete existing fields record_delete_field(rec1, fnum, ind1, ind2) ## find where the new_fields should be inserted in rec1 (insert_index) if merging_all_indicators: insert_index = 0 else: insert_index = None ind_pair = (ind1, ind2) first_last_dict = _first_and_last_index_for_each_indicator( rec1.get(fnum, [])) #find the indicator pair which is just before the one which will be inserted indicators = first_last_dict.keys() indicators.sort() ind_pair_before = None for pair in indicators: if pair > ind_pair: break else: ind_pair_before = pair if ind_pair_before is None: #if no smaller indicator pair exists insert_index = 0 #insertion will take place at the beginning else: #else insert after the last field index of the previous indicator pair insert_index = first_last_dict[ind_pair_before][1] + 1 ### add the new (merged) fields in correct 'in_field_index' position record_add_fields(rec1, fnum, new_fields, insert_index) return