Python ratio Examples, fuzzywuzzy.ratio Python Examples

Example #1

0

Show file

File: preprocessing.py Project: ccaprile/absa_product_reviews

def fuzzy_match(words, assoc_dict, assoc_nouns):
    # method that matches similar words to already collected words.

    for word in words:

        max_ratio = 0
        max_cat = ''
        for cat, tokens in assoc_dict.items():
            for token in tokens:
                ratio = fuzz.ratio(word, token)
                if ratio > max_ratio:
                    max_ratio = ratio
                    max_cat = cat

        if max_cat != '' and max_ratio > 84:
            assoc_dict[max_cat].append(word)
            assoc_nouns.append(word)

    return assoc_dict, assoc_nouns

Example #2

0

Show file

File: process.py Project: mtpain/contracts

def normalize_company_list(company_list):
    """
    transform the list of companies to be normalized, meaning companies with
    important words in common be mapped to a common name when they match to
    a high enough degree
    """
    strip_names = [n.strip().lower() for n in company_list]

    cur_name = company_list[0].strip().lower()

    for i, matching_name in enumerate(strip_names):

        match_idxs = []

        for j, next_name in enumerate(strip_names):

            if fuzz.ratio(cur_name, next_name.strip().lower()) > 75:
                match_idxs.append(j)

        # can improve the make_normalized_name as necessary
        company_list[match_idxs] =\
            make_normalized_name(company_list[matched_idxs])

    return company_list

Example #3

0

Show file

File: process.py Project: only1dallas/contracts

def normalize_company_list(company_list):
    """
    transform the list of companies to be normalized, meaning companies with
    important words in common be mapped to a common name when they match to
    a high enough degree
    """
    strip_names = [n.strip().lower() for n in company_list]

    cur_name = company_list[0].strip().lower()

    for i, matching_name in enumerate(strip_names):

        match_idxs = []

        for j, next_name in enumerate(strip_names):

            if fuzz.ratio(cur_name, next_name.strip().lower()) > 75:
                match_idxs.append(j)

        # can improve the make_normalized_name as necessary
        company_list[match_idxs] =\
            make_normalized_name(company_list[matched_idxs])

    return company_list

Example #4

0

Show file

File: analyze_new_words2.py Project: gpfvic/NewWordDector

                where freq > 10 and pmi > 20 and entropy >0.1
                order by freq desc
                """);
rows = cursor.fetchall()
# for d in rows:  FINAL_WORDS.add(d['word'])
cursor.close()
conn.close()


##2 识别相似字符串 ，频率相同，或相差5%以内， 取pmi大5倍以上的
choices = word_set.copy()

for w in tqdm(word_set):
    score_list = []
    for b in choices:
        ratio = fuzzywuzzy.ratio(w, b)
        if ratio > 0.6:
            score_list.append((b, ratio))
    score_list.sort(key=lambda i:i[1], reverse=True)
    
    if len(score_list)<1: continue
    word = wordbook[w]
    
    likely_word = {}
    for sw, _ in score_list:
        sword = wordbook[sw]
        dratio = sword['pmi'] / float(word['pmi']) 
        likely_word[dratio] = sw
    if len(likely_word)==0:
        FINAL_WORDS.add(w)
    else:

Example #5

0

Show file

File: ProcessorParser.py Project: zcnmashleu95/code-contribution-assessment

 def need_to_change_author(self, first, second, setting_ratio):
     ratio = fuzz.ratio(first, second)
     if ratio >= setting_ratio:
         return True
     else:
         return False

Example #6

0

Show file

File: publictest-full.py Project: sourcery-ai-bot/481hw3

 def testRatioUnicodeString(self):
     s1 = "\u00C1"
     s2 = "ABCD"
     score = fuzzywuzzy.ratio(s1, s2)
     self.assertEqual(0, score)

Example #7

0

Show file

File: publictest-full.py Project: sourcery-ai-bot/481hw3

 def testEmptyStringsScore0(self):
     self.assertEqual(fuzzywuzzy.ratio("", ""), 0)
     self.assertEqual(fuzzywuzzy.partial_ratio("", ""), 0)

Example #8

0

Show file

File: publictest-full.py Project: sourcery-ai-bot/481hw3

 def testCaseInsensitive(self):
     self.assertNotEqual(fuzzywuzzy.ratio(self.s1, self.s2), 100)
     self.assertEqual(
         fuzzywuzzy.ratio(fuzzywuzzy.full_process(self.s1),
                          fuzzywuzzy.full_process(self.s2)), 100)

Example #9

0

Show file

File: publictest-full.py Project: sourcery-ai-bot/481hw3

 def testEqual(self):
     self.assertEqual(fuzzywuzzy.ratio(self.s1, self.s1a), 100)

Example #10

0

Show file

dtset2 = dtset2.reset_index(drop=True)
dtset1_test = dtset1_test.reset_index(drop=True)
dtset2_test = dtset2_test.reset_index(drop=True)
while flag:
    F_threshold += 0.5
    F_count = 0
    F_dobW = F_threshold
    F_uniqueNms = []
    F_labels = {}
    for i in range(len(dtset1)):
        F_labels[i] = -1
    for i in range(len(dtset1)):
        for j in range(len(dtset1)):
            F_fn_C = (max(
                (dtset1[i]['fn'].str.len()), (dtset1[j]['fn'].str.len()))) * (
                    1 - fuzz.ratio(dtset1[i]['fn'], dtset1[j]['fn']) / 100)
            F_ln_C = (max(
                (dtset1[i]['ln'].str.len()), (dtset1[j]['ln'].str.len()))) * (
                    1 - fuzz.ratio(dtset1[i]['ln'], dtset1[j]['ln']) / 100)
            F_dobC = (dtset1[i]['dob'] != dtset1[j]['dob']) * dobW
            if (F_fn_C + F_ln_C + F_dobC < threshold):
                if F_labels[j] == -1 and F_labels[i] == -1:
                    F_labels[j] = F_count
                    F_count += 1
                    F_labels[i] = F_labels[j]
                else:
                    if F_labels[j] == -1:
                        F_labels[j] = F_labels[i]
                    elif F_labels[i] == -1:
                        F_labels[i] = F_labels[j]
                    else: