def test_similarity_regression_6(self):
     result = calculate_author_similarity("pedro bernaola galva n",
                                          "pedro neto")
     self.assertEqual(result, False)
     result = calculate_author_similarity("ahmed metwally",
                                          "ahmed hassan m h ali")
     self.assertEqual(result, False)
    def test_similarity_regression_9(self):
        self.assertEqual(calculate_author_similarity("zhang xiu", "zhang xiu"),
                         True)
        self.assertEqual(calculate_author_similarity("zhang xiu", "xiu zhang"),
                         True)

        self.assertEqual(calculate_author_similarity("zhang xiu", "zhang x"),
                         True)
        self.assertEqual(calculate_author_similarity("zhang xiu", "x zhang"),
                         True)
 def test_regression_10(self):
     self.assertEqual(
         calculate_author_similarity("John Michael", "John Meyer Michael"),
         True)
     self.assertEqual(
         calculate_author_similarity("Mohn Michael", "John Michael"), False)
     self.assertEqual(
         calculate_author_similarity("rhys hill", "donna r hill"), False)
     self.assertEqual(
         calculate_author_similarity("jose pereira",
                                     "jose m g torres pereira"), False)
Exemple #4
0
def start(attribute,transformation_function=default_func):
    authors = pandas.read_csv(os.path.join(file_path, "Author.csv"), index_col="Id")
    read_connector = pymysql.connect(user="******",
                                     password="******",
                                     host="localhost",
                                     charset="utf8mb4")
    counter = 0
    result_id = pandas.Series([])
    result_names = pandas.Series([])
    with read_connector.cursor() as cursor:
        for key, value in authors.iterrows():

            name = str(value['Name'])
            # transform name
            transformed_name = transformation_function(name)
            query = get_search_query(transformed_name)
            # generate search query
            SEARCH_QUERY = 'SELECT Id,{} FROM names.authorspapers WHERE MATCH({}) AGAINST (%s IN BOOLEAN MODE)'.format(
                attribute, attribute
            )
            # perform query
            cursor.execute(SEARCH_QUERY, (query,))

            matched = []
            for element in cursor:
                matched.append({
                    "id": element[0],
                    "name": element[1]
                })

            # apply algorithm
            similar = [obj for obj in matched if calculate_author_similarity(transformed_name, obj['name'])]
            # store ids as string
            id_list = ' '.join(str(x['id']) for x in similar if x['id'] != key)
            #name_list = ','.join(str(x['name']) for x in similar if x['name'] != transformed_name)

            result_id[key] =str(key) + " "+id_list
            #result_names[name] =str(transformed_name)+","+ name_list

            counter += 1
            if counter % 5000 == 0:
                print(counter)

    result_id.to_csv(os.path.join(file_path,"test_id15.csv"))
    #result_names.to_csv(os.path.join(file_path, "test_names15.csv"))
    read_connector.close()
 def test_similarity_regression_4(self):
     result = calculate_author_similarity("p d h hill", "hillary d protas")
     self.assertEqual(result, False)
 def test_similarity_regression_3(self):
     result = calculate_author_similarity("ernest j", "john e")
     self.assertEqual(result, False)
 def test_similarity_regression_2(self):
     result = calculate_author_similarity("x", "xin xin")
     self.assertEqual(result, False)
 def test_similarity_regression_1(self):
     result = calculate_author_similarity("yufeng xin", "xinzhi xing")
     self.assertEqual(result, False)
 def test_similarity_6(self):
     result = calculate_author_similarity("chin a bing jin",
                                          "anton j lin chin")
     self.assertEqual(result, False)
 def test_similarity_regression_7(self):
     result = calculate_author_similarity("j p olivier dex sardan",
                                          "olivier peulen")
     self.assertEqual(result, False)
 def test_similarity_4(self):
     result = calculate_author_similarity("chin anton jin", "anton j chin")
     self.assertEqual(result, True)
 def test_similarity_3(self):
     result = calculate_author_similarity("chin jen lin", "lin j chin")
     self.assertEqual(result, False)
 def test_similarity_2(self):
     result = calculate_author_similarity("chin jen lin", "chin j lin")
     self.assertEqual(result, True)
 def test_similarity(self):
     result = calculate_author_similarity("Chin Jen Lin", "Chin A lin")
     self.assertEqual(result, False)
 def test_similarity_regression_5(self):
     result = calculate_author_similarity("dah ming chiu",
                                          "dah ming w chiu")
     self.assertEqual(result, True)
     result2 = calculate_author_similarity("dah ming chiu", "chiu dah ming")
     self.assertEqual(result2, True)
 def test_similarity_regression_8(self):
     self.assertEqual(
         calculate_author_similarity("howard ottensen", "howard o meyer"),
         False)
     self.assertEqual(
         calculate_author_similarity("howard ottensen", "h ottensen"), True)
 def test_similarity_5(self):
     result = calculate_author_similarity("chin a j", "anton j b chin")
     self.assertEqual(result, False)