Ejemplo n.º 1
0
    def testQratioForceAscii(self):
        s1 = "ABCD\u00C1"
        s2 = "ABCD"

        score = fuzzywuzzy.QRatio(s1, s2, force_ascii=True)
        self.assertEqual(score, 100)

        score = fuzzywuzzy.QRatio(s1, s2, force_ascii=False)
        self.assertLess(score, 100)
Ejemplo n.º 2
0
    def testQRatioUnicodeString(self):
        s1 = "\u00C1"
        s2 = "ABCD"
        score = fuzzywuzzy.QRatio(s1, s2)
        self.assertEqual(0, score)

        # Cyrillic.
        s1 = "\u043f\u0441\u0438\u0445\u043e\u043b\u043e\u0433"
        s2 = "\u043f\u0441\u0438\u0445\u043e\u0442\u0435\u0440\u0430\u043f\u0435\u0432\u0442"
        score = fuzzywuzzy.QRatio(s1, s2, force_ascii=False)
        self.assertNotEqual(0, score)

        # Chinese.
        s1 = "\u6211\u4e86\u89e3\u6570\u5b66"
        s2 = "\u6211\u5b66\u6570\u5b66"
        score = fuzzywuzzy.QRatio(s1, s2, force_ascii=False)
        self.assertNotEqual(0, score)
Ejemplo n.º 3
0
def extract_features(q1, q2):
    advanced_feature = []

    # preprocessing each question
    # Removing html tags,punctuations,stemming,stopwords,contractions, and then return the text of question
    q1 = preprocess(q1)
    q2 = preprocess(q2)

    token_features = get_token_features(q1, q2)  #token_features is a list.
    advanced_feature.extend(token_features)
    #cwc_min,cwc_min,csc_min,csc_max,ctc_min,ctc_max,last_word_eq,first_word_eq,abs_len_diff,mean_len

    #fuzzy_features
    advanced_feature.append(fuzz.token_set_ratio(q1, q2))  #token_set_ratio
    advanced_feature.append(fuzz.token_sort_ratio(q1, q2))  #token_sort_ratio
    advanced_feature.append(fuzz.QRatio(q1, q2))  #fuzz_ratio
    advanced_feature.append(fuzz.partial_ratio(q1, q2))  #fuzz_partial_ratio
    advanced_feature.append(get_longest_substr_ratio(
        q1, q2))  #longest_substr_ratio

    return advanced_feature
Ejemplo n.º 4
0
 def testQuickRatioNotEqual(self):
     self.assertNotEqual(fuzzywuzzy.QRatio(self.s1, self.s3), 100)
Ejemplo n.º 5
0
 def testQuickRatioCaseInsensitive(self):
     self.assertEqual(fuzzywuzzy.QRatio(self.s1, self.s2), 100)
Ejemplo n.º 6
0
 def testQuickRatioEqual(self):
     self.assertEqual(fuzzywuzzy.QRatio(self.s1, self.s1a), 100)