def add_citation_identity(self, ci, only_fast_match_methods = True): """According to records in ZBL file (self.main_zbl_path) and id-maps (self.mr_to_id_map, self.zbl_to_id_map) tries to assign identity (<an> field) to citation (given as a dictionary).""" self.__match_identity_on_id__(ci) if ci.has_key(zbl_io.ZBL_ID_FIELD): #print "Assigning to citation [ID/ZBL/MR]:", ci[zbl_io.ZBL_ID_FIELD] self.matched = self.matched + 1 return ci elif only_fast_match_methods: self.missed = self.missed + 1 return ci candidates = [] f = open(self.main_zbl_path, 'r') for record in zbl_io.read_zbl_records(f): if ci.has_key("py") and record.has_key("py"): if ci["py"] != record["py"]: continue if self.similarity_operator(record, ci): candidates.append(aux_zbl_record) f.close() if len(candidates) == 0: self.missed = self.missed + 1 return ci matching_record = zbl_similarity.select_best_fitting_record(ci, candidates, self.selection_fields) ci[zbl_io.ZBL_ID_FIELD] = matching_record[zbl_io.ZBL_ID_FIELD] #print "Assigning to citation [SIMILARITY]:", ci[zbl_io.ZBL_ID_FIELD] self.matched = self.matched + 1 return ci
def find_most_similar_zbl_record(self, main_zbl_record): """Walks through the list of loaded (self.aux_zbl_recs_list) (aux) records and searches for zbl record that self.similarity_operator(rec1, rec2) states as similar to main_zbl_record. If more than one found then the most similar is selected. The most similar means the one that has the smallest edit distance calculated on self.selection_fields.""" candidates = [] if main_zbl_record.has_key("py"): #check all publications with this year: for aux_zbl_record in self.aux_zbl_recs_dict_py.get(main_zbl_record["py"], []): if self.similarity_operator(main_zbl_record, aux_zbl_record): candidates.append(aux_zbl_record) #check all the publications without year: for aux_zbl_record in self.aux_zbl_recs_list_no_py: if self.similarity_operator(main_zbl_record, aux_zbl_record): candidates.append(aux_zbl_record) else: #check all the publications for aux_zbl_record in self.aux_zbl_recs_list: if self.similarity_operator(main_zbl_record, aux_zbl_record): candidates.append(aux_zbl_record) if len(candidates) == 0: return None matching_record = zbl_similarity.select_best_fitting_record(main_zbl_record, candidates, self.selection_fields) #print "[find_most_similar_zbl_record] matching:", main_zbl_record[zbl_io.ZBL_ID_FIELD], "&", matching_record[zbl_io.ZBL_ID_FIELD], "out of", [r[zbl_io.ZBL_ID_FIELD] for r in candidates] return matching_record
def add_citation_identity(self, ci, only_fast_match_methods=True): """According to records in ZBL file (self.main_zbl_path) and id-maps (self.mr_to_id_map, self.zbl_to_id_map) tries to assign identity (<an> field) to citation (given as a dictionary).""" self.__match_identity_on_id__(ci) if ci.has_key(zbl_io.ZBL_ID_FIELD): #print "Assigning to citation [ID/ZBL/MR]:", ci[zbl_io.ZBL_ID_FIELD] self.matched = self.matched + 1 return ci elif only_fast_match_methods: self.missed = self.missed + 1 return ci candidates = [] f = open(self.main_zbl_path, 'r') for record in zbl_io.read_zbl_records(f): if ci.has_key("py") and record.has_key("py"): if ci["py"] != record["py"]: continue if self.similarity_operator(record, ci): candidates.append(aux_zbl_record) f.close() if len(candidates) == 0: self.missed = self.missed + 1 return ci matching_record = zbl_similarity.select_best_fitting_record( ci, candidates, self.selection_fields) ci[zbl_io.ZBL_ID_FIELD] = matching_record[zbl_io.ZBL_ID_FIELD] #print "Assigning to citation [SIMILARITY]:", ci[zbl_io.ZBL_ID_FIELD] self.matched = self.matched + 1 return ci
def find_most_similar_zbl_record(self, main_zbl_record): """Walks through the list of loaded (self.aux_zbl_recs_list) (aux) records and searches for zbl record that self.similarity_operator(rec1, rec2) states as similar to main_zbl_record. If more than one found then the most similar is selected. The most similar means the one that has the smallest edit distance calculated on self.selection_fields.""" candidates = [] if main_zbl_record.has_key("py"): #check all publications with this year: for aux_zbl_record in self.aux_zbl_recs_dict_py.get( main_zbl_record["py"], []): if self.similarity_operator(main_zbl_record, aux_zbl_record): candidates.append(aux_zbl_record) #check all the publications without year: for aux_zbl_record in self.aux_zbl_recs_list_no_py: if self.similarity_operator(main_zbl_record, aux_zbl_record): candidates.append(aux_zbl_record) else: #check all the publications for aux_zbl_record in self.aux_zbl_recs_list: if self.similarity_operator(main_zbl_record, aux_zbl_record): candidates.append(aux_zbl_record) if len(candidates) == 0: return None matching_record = zbl_similarity.select_best_fitting_record( main_zbl_record, candidates, self.selection_fields) #print "[find_most_similar_zbl_record] matching:", main_zbl_record[zbl_io.ZBL_ID_FIELD], "&", matching_record[zbl_io.ZBL_ID_FIELD], "out of", [r[zbl_io.ZBL_ID_FIELD] for r in candidates] return matching_record