Esempio n. 1
0
    def testIssueSeven(self):
        s1 = "HSINCHUANG"
        s2 = "SINJHUAN"
        s3 = "LSINJHUANG DISTRIC"
        s4 = "SINJHUANG DISTRICT"

        self.assertTrue(fuzzywuzzy.partial_ratio(s1, s2) > 75)
        self.assertTrue(fuzzywuzzy.partial_ratio(s1, s3) > 75)
        self.assertTrue(fuzzywuzzy.partial_ratio(s1, s4) > 75)
Esempio n. 2
0
def extract_features(q1, q2):
    advanced_feature = []

    # preprocessing each question
    # Removing html tags,punctuations,stemming,stopwords,contractions, and then return the text of question
    q1 = preprocess(q1)
    q2 = preprocess(q2)

    token_features = get_token_features(q1, q2)  #token_features is a list.
    advanced_feature.extend(token_features)
    #cwc_min,cwc_min,csc_min,csc_max,ctc_min,ctc_max,last_word_eq,first_word_eq,abs_len_diff,mean_len

    #fuzzy_features
    advanced_feature.append(fuzz.token_set_ratio(q1, q2))  #token_set_ratio
    advanced_feature.append(fuzz.token_sort_ratio(q1, q2))  #token_sort_ratio
    advanced_feature.append(fuzz.QRatio(q1, q2))  #fuzz_ratio
    advanced_feature.append(fuzz.partial_ratio(q1, q2))  #fuzz_partial_ratio
    advanced_feature.append(get_longest_substr_ratio(
        q1, q2))  #longest_substr_ratio

    return advanced_feature
Esempio n. 3
0
 def testPartialRatioUnicodeString(self):
     s1 = "\u00C1"
     s2 = "ABCD"
     score = fuzzywuzzy.partial_ratio(s1, s2)
     self.assertEqual(0, score)
Esempio n. 4
0
 def testEmptyStringsScore0(self):
     self.assertEqual(fuzzywuzzy.ratio("", ""), 0)
     self.assertEqual(fuzzywuzzy.partial_ratio("", ""), 0)
Esempio n. 5
0
 def testPartialRatio(self):
     self.assertEqual(fuzzywuzzy.partial_ratio(self.s1, self.s3), 100)