def compare_va_to_ra(va_id, ra_id): ''' Compares the origin names of a virtual author against the name list of a real author @param va_id: ID of the virtual author @type va_id: int @param ra_id: ID of the real author @type ra_id: int @return: The probability resulting from the name comparison. @rtype: float ''' bconfig.LOGGER.info("|-> Start of name comparison (va %s : ra %s)" % (va_id, ra_id)) ra_names = get_realauthor_names_from_set(ra_id) va_nameid_recs = get_virtualauthor_records(va_id, tag='orig_authorname_id') # print "RA Names: ", ra_names # print "VA Name: ", va_name authorname_id = -1 if va_nameid_recs: authorname_id = va_nameid_recs[0]['value'] authorname_strings = get_name_and_db_name_strings(authorname_id) if not authorname_strings["name"]: return 0.0 comparisons = [] for ra_name in ra_names: comparison = compare_names(authorname_strings["name"], ra_name) bconfig.LOGGER.info("|-> %s & %s -> %s" % (authorname_strings["name"], ra_name, comparison)) comparisons.append(comparison) #print "checking ",name_1," against ", name_2 bconfig.LOGGER.debug("|--> Name comparisons: %s" % (comparisons)) bconfig.LOGGER.info("|-> End of name comparison") # ret = average(comparisons) ret = float(sum(comparisons)) / len(comparisons) if ret < .1: ret = 0 #.1 bconfig.LOGGER.info("|--> Resulting name probability: %s" % (ret)) return ret
def compare_va_to_ra(va_id, ra_id): """ Compares the origin names of a virtual author against the name list of a real author @param va_id: ID of the virtual author @type va_id: int @param ra_id: ID of the real author @type ra_id: int @return: The probability resulting from the name comparison. @rtype: float """ bconfig.LOGGER.info("|-> Start of name comparison (va %s : ra %s)" % (va_id, ra_id)) ra_names = get_realauthor_names_from_set(ra_id) va_nameid_recs = get_virtualauthor_records(va_id, tag="orig_authorname_id") # print "RA Names: ", ra_names # print "VA Name: ", va_name authorname_id = -1 if va_nameid_recs: authorname_id = va_nameid_recs[0]["value"] authorname_strings = get_name_and_db_name_strings(authorname_id) if not authorname_strings["name"]: return 0.0 comparisons = [] for ra_name in ra_names: comparison = compare_names(authorname_strings["name"], ra_name) bconfig.LOGGER.info("|-> %s & %s -> %s" % (authorname_strings["name"], ra_name, comparison)) comparisons.append(comparison) # print "checking ",name_1," against ", name_2 bconfig.LOGGER.debug("|--> Name comparisons: %s" % (comparisons)) bconfig.LOGGER.info("|-> End of name comparison") # ret = average(comparisons) ret = float(sum(comparisons)) / len(comparisons) if ret < 0.1: ret = 0 # .1 bconfig.LOGGER.info("|--> Resulting name probability: %s" % (ret)) return ret
def test_compare_names(self): """bibauthorid - test names comparison funcions""" self.assertEqual(0.94999999999999996, bau.compare_names('Ellis, j.', 'Ellis, j.')) self.assertEqual(1.0, bau.compare_names('Ellis, john', 'Ellis, john')) self.assertEqual(1.0, bau.compare_names('John Ellis', 'John Ellis')) # self.assertEqual(0.94999999999999996, # bau.compare_names('J. Ellis','J. Ellis')) self.assertEqual(0.0, bau.compare_names('John Ellis', 'John Mark')) self.assertEqual(0.0, bau.compare_names('Ellis, John', 'Mark, John')) self.assertEqual(0.0, bau.compare_names('', ''))