def add_citation_identity(self, ci, only_fast_match_methods = True):
     """According to records in ZBL file (self.main_zbl_path) and id-maps (self.mr_to_id_map, self.zbl_to_id_map)
         tries to assign identity (<an> field) to citation (given as a dictionary)."""            
     self.__match_identity_on_id__(ci)
             
     if ci.has_key(zbl_io.ZBL_ID_FIELD):
         #print "Assigning to citation [ID/ZBL/MR]:", ci[zbl_io.ZBL_ID_FIELD]
         self.matched = self.matched + 1 
         return ci        
     elif only_fast_match_methods: 
         self.missed = self.missed + 1
         return ci
     
     candidates = []  
     f = open(self.main_zbl_path, 'r')
     for record in zbl_io.read_zbl_records(f):
         if ci.has_key("py") and record.has_key("py"):
             if ci["py"] != record["py"]:
                 continue
         if self.similarity_operator(record, ci):
             candidates.append(aux_zbl_record)                                                                    
     f.close()
     
     if len(candidates) == 0:           
         self.missed = self.missed + 1               
         return ci    
     
     matching_record = zbl_similarity.select_best_fitting_record(ci, candidates, self.selection_fields)  
     ci[zbl_io.ZBL_ID_FIELD] =  matching_record[zbl_io.ZBL_ID_FIELD]
     #print "Assigning to citation [SIMILARITY]:", ci[zbl_io.ZBL_ID_FIELD]
     self.matched = self.matched + 1 
     return ci
 def find_most_similar_zbl_record(self, main_zbl_record):
     """Walks through the list of loaded (self.aux_zbl_recs_list) 
     (aux) records and searches for zbl record 
     that self.similarity_operator(rec1, rec2) states as similar to main_zbl_record.
     If more than one found then the most similar is selected.
     The most similar means the one that has the smallest edit distance calculated on self.selection_fields."""
     
     candidates = []    
     if main_zbl_record.has_key("py"):
         #check all publications with this year:
         for aux_zbl_record in self.aux_zbl_recs_dict_py.get(main_zbl_record["py"], []):
             if self.similarity_operator(main_zbl_record, aux_zbl_record):
                 candidates.append(aux_zbl_record)
         #check all the publications without year:                    
         for aux_zbl_record in self.aux_zbl_recs_list_no_py:
             if self.similarity_operator(main_zbl_record, aux_zbl_record):
                 candidates.append(aux_zbl_record)
     else:
         #check all the publications
         for aux_zbl_record in self.aux_zbl_recs_list:
             if self.similarity_operator(main_zbl_record, aux_zbl_record):
                 candidates.append(aux_zbl_record)
                             
     if len(candidates) == 0:
         return None        
     
     matching_record = zbl_similarity.select_best_fitting_record(main_zbl_record, candidates, self.selection_fields)
     #print "[find_most_similar_zbl_record] matching:", main_zbl_record[zbl_io.ZBL_ID_FIELD], "&", matching_record[zbl_io.ZBL_ID_FIELD], "out of", [r[zbl_io.ZBL_ID_FIELD] for r in candidates]
     return matching_record
Exemple #3
0
    def add_citation_identity(self, ci, only_fast_match_methods=True):
        """According to records in ZBL file (self.main_zbl_path) and id-maps (self.mr_to_id_map, self.zbl_to_id_map)
            tries to assign identity (<an> field) to citation (given as a dictionary)."""
        self.__match_identity_on_id__(ci)

        if ci.has_key(zbl_io.ZBL_ID_FIELD):
            #print "Assigning to citation [ID/ZBL/MR]:", ci[zbl_io.ZBL_ID_FIELD]
            self.matched = self.matched + 1
            return ci
        elif only_fast_match_methods:
            self.missed = self.missed + 1
            return ci

        candidates = []
        f = open(self.main_zbl_path, 'r')
        for record in zbl_io.read_zbl_records(f):
            if ci.has_key("py") and record.has_key("py"):
                if ci["py"] != record["py"]:
                    continue
            if self.similarity_operator(record, ci):
                candidates.append(aux_zbl_record)
        f.close()

        if len(candidates) == 0:
            self.missed = self.missed + 1
            return ci

        matching_record = zbl_similarity.select_best_fitting_record(
            ci, candidates, self.selection_fields)
        ci[zbl_io.ZBL_ID_FIELD] = matching_record[zbl_io.ZBL_ID_FIELD]
        #print "Assigning to citation [SIMILARITY]:", ci[zbl_io.ZBL_ID_FIELD]
        self.matched = self.matched + 1
        return ci
Exemple #4
0
    def find_most_similar_zbl_record(self, main_zbl_record):
        """Walks through the list of loaded (self.aux_zbl_recs_list) 
        (aux) records and searches for zbl record 
        that self.similarity_operator(rec1, rec2) states as similar to main_zbl_record.
        If more than one found then the most similar is selected.
        The most similar means the one that has the smallest edit distance calculated on self.selection_fields."""

        candidates = []
        if main_zbl_record.has_key("py"):
            #check all publications with this year:
            for aux_zbl_record in self.aux_zbl_recs_dict_py.get(
                    main_zbl_record["py"], []):
                if self.similarity_operator(main_zbl_record, aux_zbl_record):
                    candidates.append(aux_zbl_record)
            #check all the publications without year:
            for aux_zbl_record in self.aux_zbl_recs_list_no_py:
                if self.similarity_operator(main_zbl_record, aux_zbl_record):
                    candidates.append(aux_zbl_record)
        else:
            #check all the publications
            for aux_zbl_record in self.aux_zbl_recs_list:
                if self.similarity_operator(main_zbl_record, aux_zbl_record):
                    candidates.append(aux_zbl_record)

        if len(candidates) == 0:
            return None

        matching_record = zbl_similarity.select_best_fitting_record(
            main_zbl_record, candidates, self.selection_fields)
        #print "[find_most_similar_zbl_record] matching:", main_zbl_record[zbl_io.ZBL_ID_FIELD], "&", matching_record[zbl_io.ZBL_ID_FIELD], "out of", [r[zbl_io.ZBL_ID_FIELD] for r in candidates]
        return matching_record