def set_authors(connector, authors): # set number of authors print(len(authors)) print(str(len(authors))) connector.execute(("INSERT INTO `number_author`(number,counter) VALUES(%s,1)" "ON DUPLICATE KEY UPDATE counter= counter+1"), (str(len(authors)),)) for name in authors: # increment author name connector.execute(("INSERT INTO `authors`(name,counter) VALUES(%s,1)" "ON DUPLICATE KEY UPDATE counter= counter+1"), (name,)) normal_name_list = normalize_authors(name).split(" ") for normal_name in normal_name_list: if len(normal_name)== 1: continue try: # do not accept numbers as names int(normal_name) except ValueError: connector.execute(("INSERT INTO `popular_names`(name,counter) VALUES(%s,1)" "ON DUPLICATE KEY UPDATE counter= counter+1"), (normal_name,))
index_col="Id") read_connector = pymysql.connect(user="******", password="******", host="localhost", charset="utf8mb4") counter = 0 setup() with read_connector.cursor() as cursor: for key, value in authors.iterrows(): name = str(value['Name']) main_name = name if name == '' or pandas.isnull(name): print(key, "empty name") continue normal_name = normalize_authors(name) metaphone_name = metaphone(normal_name) cursor.execute( "INSERT INTO names.authors(Id,main_name,normal_name,metaphone_name) VALUES (%s,%s,%s,%s)", (int(key), main_name, normal_name, metaphone_name)) if counter % 50 == 0: read_connector.commit() counter += 1 if counter % 10000 == 0: print(counter) read_connector.commit() read_connector.close()
def test_normalize_authors3(self): result = normalize_authors("C.B. Lee") self.assertEqual(result, "c b lee")
def test_normalize_authors(self): result = normalize_authors("! Kim lu Yee ") self.assertEqual(result, "kim lux yee")
def test_search_query_3(self): result = get_author_search_query( normalize_authors("kim lee lu Meyers A. Bueno")) self.assertEqual(result, "+meyers +bueno +lux")
def test_search_query_2(self): result = get_author_search_query( normalize_authors("Richard Dawson A. St. Louis")) self.assertEqual(result, "+richard +dawson +louis")
def test_search_query(self): result = get_author_search_query(normalize_authors("Fang a Yang Su")) self.assertEqual(result, "+yang +fang +sux")
def test_relevant_names_2(self): result = get_author_relevant_names(normalize_authors("Kim Li Suu")) self.assertEqual(result, ["kim", "lix", 'suu'])
def test_relevant_names(self): result = get_author_relevant_names( normalize_authors("Martin S. Müller")) self.assertEqual(result, ["martin", "muller"])
def test_normalize_authors_2(self): result = normalize_authors("Martin S. Müller ") self.assertEqual(result, "martin s muller")