def test_check_altered(self): """bibmatch - check altered match""" records = create_records(self.recxml3) self.assertTrue(not record_has_field(records[0][0], '001')) [dummy1, matchedrecs, dummy3, dummy4] = match_records(records, modify=1) self.assertTrue(record_has_field(matchedrecs[0][0], '001'))
def test_check_altered(self): """bibmatch - check altered match""" records = create_records(self.recxml4) self.assertTrue(not record_has_field(records[0][0], '001')) [dummy1, matchedrecs, dummy3, dummy4] = match_records(records, \ modify=1, \ verbose=0) self.assertTrue(record_has_field(matchedrecs[0][0], '001'))
def test_check_altered(self): """bibmatch - check altered match""" from invenio.bibrecord import record_has_field records = create_records(self.recxml4) self.assertTrue(not record_has_field(records[0][0], '001')) [dummy1, matchedrecs, dummy3, dummy4] = match_records(records, \ modify=1, \ verbose=0) self.assertTrue(record_has_field(matchedrecs[0][0], '001'))
def _record_has_id_p(record, recid, rec_oaiid, rec_sysno): """Check if record matches any of the given IDs.""" if record_has_field(record, "001"): if record_get_field_value(record, "001", "%", "%") == str(recid): return True if record_has_field(record, OAIID_TAG[0:3]): if record_get_field_value(record, OAIID_TAG[0:3], OAIID_TAG[3], OAIID_TAG[4], OAIID_TAG[5]) == rec_oaiid: return True if record_has_field(record, SYSNO_TAG[0:3]): if record_get_field_value(record, SYSNO_TAG[0:3], SYSNO_TAG[3], SYSNO_TAG[4], SYSNO_TAG[5]) == rec_sysno: return True return False
def _record_has_id_p(record, recid, rec_oaiid, rec_sysno): """Check if record matches any of the given IDs.""" if record_has_field(record, '001'): if (record_get_field_value(record, '001', '%', '%') == str(recid)): return True if record_has_field(record, OAIID_TAG[0:3]): if (record_get_field_value(record, OAIID_TAG[0:3], OAIID_TAG[3], OAIID_TAG[4], OAIID_TAG[5]) == rec_oaiid): return True if record_has_field(record, SYSNO_TAG[0:3]): if (record_get_field_value(record, SYSNO_TAG[0:3], SYSNO_TAG[3], SYSNO_TAG[4], SYSNO_TAG[5]) == rec_sysno): return True return False
def merge_record_with_template(rec, template_name): """ Extend the record rec with the contents of the template and return it""" template = get_record_template(template_name) if not template: return template_bibrec = create_record(template)[0] for field_tag in template_bibrec: if not record_has_field(rec, field_tag): for field_instance in template_bibrec[field_tag]: record_add_field(rec, field_tag, field_instance[1], field_instance[2], subfields=field_instance[0]) else: for template_field_instance in template_bibrec[field_tag]: subfield_codes_template = field_get_subfield_codes( template_field_instance) for field_instance in rec[field_tag]: subfield_codes = field_get_subfield_codes(field_instance) for code in subfield_codes_template: if code not in subfield_codes: field_add_subfield( field_instance, code, field_get_subfield_values( template_field_instance, code)[0]) return rec
def merge_record_with_template(rec, template_name, is_hp_record=False): """ Extend the record rec with the contents of the template and return it""" template = get_record_template(template_name) if not template: return template_bibrec = create_record(template)[0] # if the record is a holding pen record make all subfields volatile if is_hp_record: record_make_all_subfields_volatile(template_bibrec) for field_tag in template_bibrec: if not record_has_field(rec, field_tag): for field_instance in template_bibrec[field_tag]: record_add_field(rec, field_tag, field_instance[1], field_instance[2], subfields=field_instance[0]) else: for template_field_instance in template_bibrec[field_tag]: subfield_codes_template = field_get_subfield_codes( template_field_instance) for field_instance in rec[field_tag]: subfield_codes = field_get_subfield_codes(field_instance) for code in subfield_codes_template: if code not in subfield_codes: field_add_subfield( field_instance, code, field_get_subfield_values( template_field_instance, code)[0]) record_order_subfields(rec) return rec
def merge_record_with_template(rec, template_name, is_hp_record=False): """ Extend the record rec with the contents of the template and return it""" template = get_record_template(template_name) if not template: return template_bibrec = create_record(template)[0] # if the record is a holding pen record make all subfields volatile if is_hp_record: record_make_all_subfields_volatile(template_bibrec) for field_tag in template_bibrec: if not record_has_field(rec, field_tag): for field_instance in template_bibrec[field_tag]: record_add_field(rec, field_tag, field_instance[1], field_instance[2], subfields=field_instance[0]) else: for template_field_instance in template_bibrec[field_tag]: subfield_codes_template = field_get_subfield_codes(template_field_instance) for field_instance in rec[field_tag]: subfield_codes = field_get_subfield_codes(field_instance) for code in subfield_codes_template: if code not in subfield_codes: field_add_subfield(field_instance, code, field_get_subfield_values(template_field_instance, code)[0]) return rec
def update_references(recid, overwrite=True): """Update references for a record First, we extract references from a record. Then, we are not updating the record directly but adding a bibupload task in -c mode which takes care of updating the record. Parameters: * recid: the id of the record """ if not overwrite: # Check for references in record record = get_record(recid) if record and record_has_field(record, '999'): raise RecordHasReferences('Record has references and overwrite ' \ 'mode is disabled: %s' % recid) if get_fieldvalues(recid, '999C59'): raise RecordHasReferences('Record has been curated: %s' % recid) # Parse references references_xml = extract_references_from_record_xml(recid) # Save new record to file (temp_fd, temp_path) = mkstemp(prefix=CFG_REFEXTRACT_FILENAME, dir=CFG_TMPSHAREDDIR) temp_file = os.fdopen(temp_fd, 'w') temp_file.write(references_xml.encode('utf-8')) temp_file.close() # Update record task_low_level_submission('bibupload', 'refextract', '-P', '5', '-c', temp_path)
def update_references(recid, overwrite=True): """Update references for a record First, we extract references from a record. Then, we are not updating the record directly but adding a bibupload task in -c mode which takes care of updating the record. Parameters: * recid: the id of the record """ if not overwrite: # Check for references in record record = get_record(recid) if record and record_has_field(record, "999"): raise RecordHasReferences("Record has references and overwrite " "mode is disabled: %s" % recid) if get_fieldvalues(recid, "999C59"): raise RecordHasReferences("Record has been curated: %s" % recid) # Parse references references_xml = extract_references_from_record_xml(recid) # Save new record to file (temp_fd, temp_path) = mkstemp(prefix=CFG_REFEXTRACT_FILENAME, dir=CFG_TMPSHAREDDIR) temp_file = os.fdopen(temp_fd, "w") temp_file.write(references_xml.encode("utf-8")) temp_file.close() # Update record task_low_level_submission("bibupload", "refextract", "-P", "5", "-c", temp_path)
def inject_recid(data): """ """ updated_records = [] for match in data: original_record_bibrec = create_records(match)[0][0] if not record_has_field(original_record_bibrec, '001'): rec_id = re_matched_recid.findall(match)[0][1] record_add_field(original_record_bibrec, tag='001', controlfield_value=rec_id) updated_records.append(original_record_bibrec) return updated_records
def record_get_recid(record): """ Returns the recid (tag 001) of the given record, if found in the database. It tries to extract an OAI ID from the given record, if not successful it returns with errorcode 0. @param record: bibrecord structure @return: recid if found, otherwise 0 on missing OAI, -1 on OAI tag error, or None if no recid found. """ recid = None if record_has_field(record, "001"): return str(record_get_field_value(record, tag="001")) oai_id = None # FIXME: CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG is not set correctly for inspire # When OAI config is OK, use bibrecord.record_get_oaiid old_oaiid_tag = "035__z" try: tag = CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3] ind1 = CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3] ind2 = CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4] code = CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5] except IndexError: sys.stderr.write("Invalid CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG") return - 1 fieldvalues = record_get_field_values(record, tag, ind1, ind2, code) for fieldvalue in fieldvalues: if fieldvalue.startswith("oai:arXiv.org:"): oai_id = fieldvalue break if oai_id == None: fieldvalues = record_get_field_values(record, old_oaiid_tag[:3], \ old_oaiid_tag[3], old_oaiid_tag[4], \ old_oaiid_tag[5]) for fieldvalue in fieldvalues: if fieldvalue.startswith("oai:arXiv.org:"): oai_id = fieldvalue break if oai_id == None: sys.stderr.write("No oai id found for record") return 0 queries = ["%s__%s:%s" % (tag, code, oai_id)] queries.append("%s__%s:%s" % (old_oaiid_tag[:3], old_oaiid_tag[5], oai_id)) queries.append("reportnumber:arXiv:%s" % (oai_id.split(":")[-1],)) for query in queries: hits = search_pattern(p=query).tolist() # Try different patterns if len(hits) == 1: return str(hits[0]) return None
def record_get_recid(record): """ Returns the recid (tag 001) of the given record, if found in the database. It tries to extract an OAI ID from the given record, if not successful it returns with errorcode 0. @param record: bibrecord structure @return: recid if found, otherwise 0 on missing OAI, -1 on OAI tag error, or None if no recid found. """ recid = None if record_has_field(record, "001"): return str(record_get_field_value(record, tag="001")) oai_id = None # FIXME: CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG is not set correctly for inspire # When OAI config is OK, use bibrecord.record_get_oaiid old_oaiid_tag = "035__z" try: tag = CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3] ind1 = CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3] ind2 = CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4] code = CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5] except IndexError: sys.stderr.write("Invalid CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG") return -1 fieldvalues = record_get_field_values(record, tag, ind1, ind2, code) for fieldvalue in fieldvalues: if fieldvalue.startswith("oai:arXiv.org:"): oai_id = fieldvalue break if oai_id == None: fieldvalues = record_get_field_values(record, old_oaiid_tag[:3], \ old_oaiid_tag[3], old_oaiid_tag[4], \ old_oaiid_tag[5]) for fieldvalue in fieldvalues: if fieldvalue.startswith("oai:arXiv.org:"): oai_id = fieldvalue break if oai_id == None: sys.stderr.write("No oai id found for record") return 0 queries = ["%s__%s:%s" % (tag, code, oai_id)] queries.append("%s__%s:%s" % (old_oaiid_tag[:3], old_oaiid_tag[5], oai_id)) queries.append("reportnumber:arXiv:%s" % (oai_id.split(":")[-1], )) for query in queries: hits = search_pattern(p=query).tolist() # Try different patterns if len(hits) == 1: return str(hits[0]) return None
def add_recid(record, recid): """ Add a given record-id to the record as $$001 controlfield. If an 001 field already exists it will be replaced. @param record: the record to retrive field-values from @type record: a bibrecord instance @param recid: record-id to be added @type recid: int """ if record_has_field(record, '001'): record_modify_controlfield(record, '001', \ controlfield_value=str(recid), \ field_position_global=1) else: record_add_field(record, '001', controlfield_value=str(recid))
def merge_record_with_template(rec, template_name): """ Extend the record rec with the contents of the template and return it""" template = get_record_template(template_name) template_bibrec = create_record(template)[0] for field_tag in template_bibrec: if not record_has_field(rec, field_tag): for field_instance in template_bibrec[field_tag]: record_add_field(rec, field_tag, field_instance[1], field_instance[2], subfields=field_instance[0]) else: for template_field_instance in template_bibrec[field_tag]: subfield_codes_template = field_get_subfield_codes(template_field_instance) for field_instance in rec[field_tag]: subfield_codes = field_get_subfield_codes(field_instance) for code in subfield_codes_template: if code not in subfield_codes: field_add_subfield( field_instance, code, field_get_subfield_values(template_field_instance, code)[0] ) return rec
def parse_resultfile(data, recid_patterns=(re_original_id,), recids=[], sysno_patterns=None, preserved_tags=[]): """ This function will look for the original recid and any matching recids in a BibMatch result file containing references to matching records in comments before every record in MARCXML format. Returns a list of BibRec structure with found recids for original and matching records. """ record_pairs = [] sysno_gen = get_sysno_generator() options = {'text-marc':1, 'aleph-marc':0} for index, match in enumerate(data): original_record_bibrec = create_records(match)[0][0] if record_has_field(original_record_bibrec, '001'): rec_id = record_get_field_value(original_record_bibrec, '001') else: sysno = sysno_gen.next() original_record_marc = create_marc_record(original_record_bibrec, sysno, options) rec_id = "" for pattern in recid_patterns: matches = pattern.findall(original_record_marc) if len(matches) > 0: rec_id = matches[0] break if recids: matching_result_recids = [recids[index]] else: matching_result_recids = re_matched_recid.findall(match) matching_result_sysnos = [] preserved_fields = {} print preserved_tags for tag in preserved_tags: try: print 'doing it' + tag preserved_fields[tag] = original_record_bibrec[tag] except KeyError: pass record_pairs.append((rec_id, matching_result_recids, matching_result_sysnos, preserved_fields)) return record_pairs
def add_field(rec1, rec2, fnum, findex1, findex2): """Adds the field of rec2 into rec1 in a position that depends on the diffing of rec1 with rec2. @param rec1: First record (a record dictionary structure) @param rec2: Second record (a record dictionary structure) @param fnum: a 3 characters long string indicating field tag number @param findex1: the rec1 field position in the group of fields it belongs @param findex2: the rec2 field position in the group of fields it belongs """ field_to_add = rec2[fnum][findex2] ### if findex1 indicates an existing field in rec1, insert the field of rec2 ### before the field of rec1 if findex1 is not None: record_add_fields(rec1, fnum, [field_to_add], findex1) return ### check if field tag does not exist in record1 if not record_has_field(rec1, fnum): record_add_fields(rec1, fnum, [field_to_add]) #insert at the beginning return ### if findex1 is None and the fieldtag already exists #get diffs for all indicators of the field. alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields) alldiffs = alldiffs[ 1] #keep only the list of diffs by indicators (without the 'c') diff = _combine_diffs(alldiffs) #combine results in one list #find the position of the field after which the insertion should take place findex1 = -1 for m in diff: if m[1] == findex2: break if m[0] is not None: findex1 = m[0] #finally add the field (one position after) record_add_fields(rec1, fnum, [field_to_add], findex1 + 1)
def add_field(rec1, rec2, fnum, findex1, findex2): """Adds the field of rec2 into rec1 in a position that depends on the diffing of rec1 with rec2. @param rec1: First record (a record dictionary structure) @param rec2: Second record (a record dictionary structure) @param fnum: a 3 characters long string indicating field tag number @param findex1: the rec1 field position in the group of fields it belongs @param findex2: the rec2 field position in the group of fields it belongs """ field_to_add = rec2[fnum][findex2] ### if findex1 indicates an existing field in rec1, insert the field of rec2 ### before the field of rec1 if findex1 is not None: record_add_fields(rec1, fnum, [field_to_add], findex1) return ### check if field tag does not exist in record1 if not record_has_field(rec1, fnum): record_add_fields(rec1, fnum, [field_to_add]) #insert at the beginning return ### if findex1 is None and the fieldtag already exists #get diffs for all indicators of the field. alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields) alldiffs = alldiffs[1] #keep only the list of diffs by indicators (without the 'c') diff = _combine_diffs(alldiffs) #combine results in one list #find the position of the field after which the insertion should take place findex1 = -1 for m in diff: if m[1] == findex2: break if m[0] is not None: findex1 = m[0] #finally add the field (one position after) record_add_fields(rec1, fnum, [field_to_add], findex1+1)
def merge_field_group(rec1, rec2, fnum, ind1='', ind2='', merge_conflicting_fields=False): """Merges non-conflicting fields from 'rec2' to 'rec1' for a specific tag. the second record. @param rec1: First record (a record dictionary structure) @param rec2: Second record (a record dictionary structure) @param fnum: a 3 characters long string indicating field tag number @param ind1: a 1 character long string @param ind2: a 1 character long string @param merge_conflicting_fields: whether to merge conflicting fields or not """ ### Check if merging goes for all indicators and set a boolean merging_all_indicators = not ind1 and not ind2 ### check if there is no field in rec2 to be merged in rec1 if not record_has_field(rec2, fnum): return ### get fields of rec2 if merging_all_indicators: fields2 = record_get_field_instances(rec2, fnum, '%', '%') else: fields2 = record_get_field_instances(rec2, fnum, ind1, ind2) if len(fields2) == 0: return ### check if field in rec1 doesn't even exist if not record_has_field(rec1, fnum): record_add_fields(rec1, fnum, fields2) return ### compare the fields, get diffs for given indicators alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields, ind1, ind2) ### check if fields are the same if alldiffs is None: return #nothing to merge ### find the diffing for the fields of the given indicators alldiffs = alldiffs[ 1] #keep only the list of diffs by indicators (without the 'c') if merging_all_indicators: #combine the diffs for each indicator to one list diff = _combine_diffs(alldiffs) else: #diffing for one indicator for diff in alldiffs: #look for indicator pair in diff result if diff[0] == (ind1, ind2): break else: raise Exception, "Indicators not in diff result." diff = diff[ 1] #keep only the list of diffs (without the indicator tuple) ### proceed to merging fields in a new field list fields1, fields2 = rec1[fnum], rec2[fnum] new_fields = [] if merge_conflicting_fields == False: #merge non-conflicting fields for m in diff: #for every match of fields in the diff if m[0] is not None: #if rec1 has a field in the diff, keep it new_fields.append(deepcopy(fields1[m[0]])) else: #else take the field from rec2 new_fields.append(deepcopy(fields2[m[1]])) else: #merge all fields for m in diff: #for every match of fields in the diff if m[1] is not None: #if rec2 has a field, add it new_fields.append(deepcopy(fields2[m[1]])) if m[0] is not None and fields1[m[0]][0] != fields2[m[1]][0]: #if the fields are not the same then add the field of rec1 new_fields.append(deepcopy(fields1[m[0]])) else: new_fields.append(deepcopy(fields1[m[0]])) ### delete existing fields record_delete_field(rec1, fnum, ind1, ind2) ## find where the new_fields should be inserted in rec1 (insert_index) if merging_all_indicators: insert_index = 0 else: insert_index = None ind_pair = (ind1, ind2) first_last_dict = _first_and_last_index_for_each_indicator( rec1.get(fnum, [])) #find the indicator pair which is just before the one which will be inserted indicators = first_last_dict.keys() indicators.sort() ind_pair_before = None for pair in indicators: if pair > ind_pair: break else: ind_pair_before = pair if ind_pair_before is None: #if no smaller indicator pair exists insert_index = 0 #insertion will take place at the beginning else: #else insert after the last field index of the previous indicator pair insert_index = first_last_dict[ind_pair_before][1] + 1 ### add the new (merged) fields in correct 'in_field_index' position record_add_fields(rec1, fnum, new_fields, insert_index) return
def match_records(records, qrystrs=None, perform_request_search_mode="eee", \ operator="a", verbose=1, server_url=CFG_SITE_URL, modify=0): """ Match passed records with existing records on a local or remote Invenio installation. Returns which records are new (no match), which are matched, which are ambiguous and which are fuzzy-matched. A formatted result of each records matching are appended to each record tuple: (record, status_code, list_of_errors, result) @param records: records to analyze @type records: list of records @param qrystrs: Querystrings @type qrystrs: list of object @param server_url: which server to search on. Local installation by default @type server_url: str @param perform_request_search_mode: run the query in this mode @type perform_request_search_mode: string @param operator: "o" "a" @type operator: str @param verbose: be loud @type verbose: int @param modify: output modified records of matches @type modify: int @rtype: list of lists @return an array of arrays of records, like this [newrecs,matchedrecs, ambiguousrecs,fuzzyrecs] """ server = InvenioConnector(server_url) newrecs = [] matchedrecs = [] ambiguousrecs = [] fuzzyrecs = [] record_counter = 0 for rec in records: record_counter += 1 if (verbose > 1): sys.stderr.write("\n Processing record: #%d .." % record_counter) if qrystrs == None: qrystrs = [] if len(qrystrs)==0: qrystrs.append("") more_detailed_info = "" for qrystr in qrystrs: querystring = Querystring() querystring.default() if(qrystr != ""): querystring.from_qrystr(qrystr, perform_request_search_mode, operator) else: querystring.default() querystring.search_engine_encode() ### get field values for record instance inst = [] ### get appropriate fields from database for field in querystring.field: tags = get_field_tags(field) if len(tags) > 0: # Fetch value from input record of first tag only # FIXME: Extracting more then first tag, evaluating each field = tags[0] ### use expanded tags tag = field[0:3] ind1 = field[3:4] ind2 = field[4:5] code = field[5:6] if((ind1 == "_")or(ind1 == "%")): ind1 = "" if((ind2 == "_")or(ind2 == "%")): ind2 = "" if((code == "_")or(code == "%")): code = "a" if(field != "001"): finsts = record_get_field_instances(rec[0], tag, ind1, ind2) sbf = get_subfield(finsts, code) inst.append(sbf) elif(field in ["001"]): sbf = record_get_field_values(rec[0], field, ind1="", ind2="", code="") inst.append(sbf) else: inst.append("") ### format acquired field values i = 0 for instance in inst: for format in querystring.format[i]: inst[i] = bibconvert.FormatField(inst[i], format) i += 1 ### perform the search if(inst[0] != ""): p1 = inst[0] f1 = querystring.field[0] m1 = querystring.mode[0] op1 = querystring.operator[0] p2 = inst[1] f2 = querystring.field[1] m2 = querystring.mode[1] op2 = querystring.operator[1] p3 = inst[2] f3 = querystring.field[2] m3 = querystring.mode[2] #1st run the basic perform_req_search recID_list = server.search( p1=p1, f1=f1, m1=m1, op1=op1, p2=p2, f2=f2, m2=m2, op2=op2, p3=p3, f3=f3, m3=m3, of='id') if (verbose > 8): sys.stderr.write("\nperform_request_search with values"+\ " p1="+str(p1)+" f1="+str(f1)+" m1="+str(m1)+" op1="+str(op1)+\ " p2="+str(p2)+" f2="+str(f2)+" m2="+str(m2)+" op2="+str(op2)+\ " p3="+str(p3)+" f3="+str(f3)+" m3="+str(m3)+\ " result="+str(recID_list)+"\n") if len(recID_list) > 1: #ambig match ambiguousrecs.append(rec + (match_result_output(recID_list, \ server_url, querystring, "ambiguous-matched"), )) if (verbose > 8): sys.stderr.write("ambiguous\n") if len(recID_list) == 1: #match if modify: if record_has_field(rec[0], '001'): record_modify_controlfield(rec[0], '001', \ controlfield_value=str(recID_list[0]), \ field_position_global=1) else: record_add_field(rec[0], '001', controlfield_value=str(recID_list[0])) matchedrecs.append(rec + (match_result_output(recID_list, \ server_url, querystring, "exact-matched"), )) if (verbose > 8): sys.stderr.write("match\n") if len(recID_list) == 0: #no match.. #try fuzzy matching intersected = None #check if all the words appear in the #field of interest words1 = main_words_list(p1) words2 = main_words_list(p2) words3 = main_words_list(p3) for word in words1: word = "'"+word+"'" ilist = server.search(p=word, f=f1, of="id") if (verbose > 8): sys.stderr.write("fuzzy perform_request_search with values"+\ " p="+str(word)+" f="+str(f1)+" res "+str(ilist)+"\n") if intersected == None: intersected = ilist intersected = list(set(ilist)&set(intersected)) for word in words2: word = "'"+word+"'" ilist = server.search(p=word, f=f2, of="id") if (verbose > 8): sys.stderr.write("fuzzy perform_request_search with values"+\ " p="+str(word)+" f="+str(f2)+" res "+str(ilist)+"\n") if intersected == None: intersected = ilist intersected = list(set(ilist)&set(intersected)) for word in words3: word = "'"+word+"'" ilist = server.search(p=word, f=f3, of="id") if (verbose > 8): sys.stderr.write("fuzzy perform_request_search with values"+\ " p="+str(word)+" f="+str(f3)+" res "+str(ilist)+"\n") if intersected == None: intersected = ilist intersected = list(set(ilist)&set(intersected)) if intersected: #this was a fuzzy match if modify: if record_has_field(rec[0], '001'): record_modify_controlfield(rec[0], '001', \ controlfield_value=str(intersected[0]), field_position_global=1) else: record_add_field(rec[0], '001', controlfield_value=str(intersected[0])) fuzzyrecs.append(rec + (match_result_output(intersected, \ server_url, querystring, "fuzzy-matched"), )) if (verbose > 8): sys.stderr.write("fuzzy\n") else: #no match newrecs.append(rec + (match_result_output(recID_list, \ server_url, querystring), )) if (verbose > 8): sys.stderr.write("new\n") #return results return [newrecs, matchedrecs, ambiguousrecs, fuzzyrecs]
def merge_field_group(rec1, rec2, fnum, ind1='', ind2='', merge_conflicting_fields=False): """Merges non-conflicting fields from 'rec2' to 'rec1' for a specific tag. the second record. @param rec1: First record (a record dictionary structure) @param rec2: Second record (a record dictionary structure) @param fnum: a 3 characters long string indicating field tag number @param ind1: a 1 character long string @param ind2: a 1 character long string @param merge_conflicting_fields: whether to merge conflicting fields or not """ ### Check if merging goes for all indicators and set a boolean merging_all_indicators = not ind1 and not ind2 ### check if there is no field in rec2 to be merged in rec1 if not record_has_field(rec2, fnum): return ### get fields of rec2 if merging_all_indicators: fields2 = record_get_field_instances(rec2, fnum, '%', '%') else: fields2 = record_get_field_instances(rec2, fnum, ind1, ind2) if len(fields2)==0: return ### check if field in rec1 doesn't even exist if not record_has_field(rec1, fnum): record_add_fields(rec1, fnum, fields2) return ### compare the fields, get diffs for given indicators alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields, ind1, ind2) ### check if fields are the same if alldiffs is None: return #nothing to merge ### find the diffing for the fields of the given indicators alldiffs = alldiffs[1] #keep only the list of diffs by indicators (without the 'c') if merging_all_indicators: #combine the diffs for each indicator to one list diff = _combine_diffs(alldiffs) else: #diffing for one indicator for diff in alldiffs: #look for indicator pair in diff result if diff[0] == (ind1, ind2): break else: raise Exception, "Indicators not in diff result." diff = diff[1] #keep only the list of diffs (without the indicator tuple) ### proceed to merging fields in a new field list fields1, fields2 = rec1[fnum], rec2[fnum] new_fields = [] if merge_conflicting_fields == False: #merge non-conflicting fields for m in diff: #for every match of fields in the diff if m[0] is not None: #if rec1 has a field in the diff, keep it new_fields.append( deepcopy(fields1[m[0]]) ) else: #else take the field from rec2 new_fields.append( deepcopy(fields2[m[1]]) ) else: #merge all fields for m in diff: #for every match of fields in the diff if m[1] is not None: #if rec2 has a field, add it new_fields.append( deepcopy(fields2[m[1]]) ) if m[0] is not None and fields1[m[0]][0] != fields2[m[1]][0]: #if the fields are not the same then add the field of rec1 new_fields.append( deepcopy(fields1[m[0]]) ) else: new_fields.append( deepcopy(fields1[m[0]]) ) ### delete existing fields record_delete_field(rec1, fnum, ind1, ind2) ## find where the new_fields should be inserted in rec1 (insert_index) if merging_all_indicators: insert_index = 0 else: insert_index = None ind_pair = (ind1, ind2) first_last_dict = _first_and_last_index_for_each_indicator( rec1.get(fnum, []) ) #find the indicator pair which is just before the one which will be inserted indicators = first_last_dict.keys() indicators.sort() ind_pair_before = None for pair in indicators: if pair > ind_pair: break else: ind_pair_before = pair if ind_pair_before is None: #if no smaller indicator pair exists insert_index = 0 #insertion will take place at the beginning else: #else insert after the last field index of the previous indicator pair insert_index = first_last_dict[ind_pair_before][1] + 1 ### add the new (merged) fields in correct 'in_field_index' position record_add_fields(rec1, fnum, new_fields, insert_index) return