예제 #1
0
def set_authors(connector, authors):
    # set number of authors
    print(len(authors))
    print(str(len(authors)))
    connector.execute(("INSERT INTO `number_author`(number,counter) VALUES(%s,1)"
                       "ON DUPLICATE KEY UPDATE counter= counter+1"), (str(len(authors)),))

    for name in authors:
        # increment author name
        connector.execute(("INSERT INTO `authors`(name,counter) VALUES(%s,1)"
                           "ON DUPLICATE KEY UPDATE counter= counter+1"), (name,))

        normal_name_list = normalize_authors(name).split(" ")
        for normal_name in normal_name_list:
            if len(normal_name)== 1:
                continue
            try:
                # do not accept numbers as names
                int(normal_name)
            except ValueError:
                connector.execute(("INSERT INTO `popular_names`(name,counter) VALUES(%s,1)"
                                   "ON DUPLICATE KEY UPDATE counter= counter+1"), (normal_name,))
예제 #2
0
                          index_col="Id")
read_connector = pymysql.connect(user="******",
                                 password="******",
                                 host="localhost",
                                 charset="utf8mb4")
counter = 0
setup()
with read_connector.cursor() as cursor:
    for key, value in authors.iterrows():
        name = str(value['Name'])
        main_name = name

        if name == '' or pandas.isnull(name):
            print(key, "empty name")
            continue

        normal_name = normalize_authors(name)
        metaphone_name = metaphone(normal_name)

        cursor.execute(
            "INSERT INTO names.authors(Id,main_name,normal_name,metaphone_name) VALUES (%s,%s,%s,%s)",
            (int(key), main_name, normal_name, metaphone_name))

        if counter % 50 == 0:
            read_connector.commit()
        counter += 1
        if counter % 10000 == 0:
            print(counter)
    read_connector.commit()
read_connector.close()
예제 #3
0
 def test_normalize_authors3(self):
     result = normalize_authors("C.B. Lee")
     self.assertEqual(result, "c b lee")
예제 #4
0
 def test_normalize_authors(self):
     result = normalize_authors("! Kim lu Yee ")
     self.assertEqual(result, "kim lux yee")
예제 #5
0
 def test_search_query_3(self):
     result = get_author_search_query(
         normalize_authors("kim lee  lu Meyers A. Bueno"))
     self.assertEqual(result, "+meyers +bueno +lux")
예제 #6
0
 def test_search_query_2(self):
     result = get_author_search_query(
         normalize_authors("Richard Dawson A. St. Louis"))
     self.assertEqual(result, "+richard +dawson +louis")
예제 #7
0
 def test_search_query(self):
     result = get_author_search_query(normalize_authors("Fang a Yang Su"))
     self.assertEqual(result, "+yang +fang +sux")
예제 #8
0
 def test_relevant_names_2(self):
     result = get_author_relevant_names(normalize_authors("Kim Li Suu"))
     self.assertEqual(result, ["kim", "lix", 'suu'])
예제 #9
0
 def test_relevant_names(self):
     result = get_author_relevant_names(
         normalize_authors("Martin S. Müller"))
     self.assertEqual(result, ["martin", "muller"])
예제 #10
0
 def test_normalize_authors_2(self):
     result = normalize_authors("Martin S. Müller ")
     self.assertEqual(result, "martin s muller")