Python match_rating_codexの例、jellyfish.match_rating_codex Pythonの例

コード例 #1

0

ファイルを表示

ファイル: collate_ms.py プロジェクト: brianhie/variant.ms

def token_similarity(a, b):
    # Strings are a case insensitive match.
    # Match any whitespace to any whitespace.
    if a.word.lower().strip() == b.word.lower().strip():
        return 1.

    # Make it impossible for words to map to whitespace.
    if ((isspace(a.word) and not isspace(b.word))
            or (not isspace(a.word) and isspace(b.word))):
        return -1.

    # Make it impossible for words to map to punctuation.
    if ispunc(a.word) and ispunc(b.word):
        return 0.9
    if ((ispunc(a.word) and not ispunc(b.word))
            or (not ispunc(a.word) and ispunc(b.word))):
        return -1.

    # Strings sound alike (approximate phonetic match).
    if a.word.isalpha() and b.word.isalpha():
        if jf.metaphone(a.word) == jf.metaphone(b.word):
            return 0.9
        if jf.soundex(a.word) == jf.soundex(b.word):
            return 0.9
        if jf.nysiis(a.word) == jf.nysiis(b.word):
            return 0.9
        if jf.match_rating_codex(a.word) == jf.match_rating_codex(b.word):
            return 0.9

    # Use scaled Jaro-Winkler distance.
    return jf.jaro_winkler(a.word, b.word)

コード例 #2

0

ファイルを表示

ファイル: utils.py プロジェクト: activewizardslab/datascience_python

def compare_for_seniority_finding(s1, s2):
    """ Returns the input word if it is similar (according to corresponding algorithms) to some another word.
        s1 - main string, s2 - string from list for comparison
    """
    fpr = fuzz.partial_ratio(s1, s2)
    jac_metaphone = (1-distance.jaccard(jellyfish.metaphone(unicode(s1)).lower(), jellyfish.metaphone(unicode(s2)).lower()))*100
    jac_soundex = (1-distance.jaccard(jellyfish.soundex(unicode(s1)).lower(), jellyfish.soundex(unicode(s2)).lower()))*100
    jac_mrc = (1-distance.jaccard(jellyfish.match_rating_codex(unicode(s1)).lower(), jellyfish.match_rating_codex(unicode(s2)).lower()))*100
    return fpr >= 50 and jac_soundex > 70 and jac_metaphone > 65 and jac_mrc > 65

コード例 #3

0

ファイルを表示

def mrc():
    # english  -----------------------------
    tokens = [
        'Ball Bearing', 'bll brng', 'Centrifugal', 'centrifigal', 'PUmp', 'pmp'
    ]

    print('Running Match Rating Codex (EN)...')

    # print tokens
    print('Tokens: ', end='')
    for i in tokens:
        print(i, ' | ', end='')

    # printcodes
    print('\n', end="")
    print('Codes: ', end='')
    for i in tokens:
        print(jellyfish.match_rating_codex(i), ' | ', end='')

    # print string match comparisons
    print('\n', end="")
    print('Comparisons: ', end='')
    print('Ball Bearing, bll brng: ',
          jellyfish.match_rating_comparison('Ball Bearing', 'bll brng'))
    print('Centrifugal, centrifigal: ',
          jellyfish.match_rating_comparison('Centrifugal', 'centrifigal'))
    print('PUmp, pmp: ', jellyfish.match_rating_comparison('PUmp', 'pmp'))

    # german  -----------------------------
    tokens = [
        'Kugellager', 'kugelagr', 'Zentrifugal', 'zentrifkl', 'PUmpe', 'pmp'
    ]

    print('\n\nRunning Match Rating Codex Comparison (DE)...')

    # print tokens
    print('Tokens: ', end='')
    for i in tokens:
        print(i, ' | ', end='')

    # printcodes
    print('\n', end="")
    print('Codes: ', end='')
    for i in tokens:
        print(jellyfish.match_rating_codex(i), ' | ', end='')

    # print string match comparisons
    print('\n', end="")
    print('Comparisons: ', end='')
    print('Kugellager,  kugelagr: ',
          jellyfish.match_rating_comparison('Kugellager', 'kugelagr'))
    print('Zentrifugal, zentrifkl: ',
          jellyfish.match_rating_comparison('Zentrifugal', 'zentrifkl'))
    print('PUmpe, pmp: ', jellyfish.match_rating_comparison('PUmpe', 'pmp'))

コード例 #4

0

ファイルを表示

ファイル: utils.py プロジェクト: activewizardslab/datascience_python

def compare_for_seniority_finding(s1, s2):
    """ Returns the input word if it is similar (according to corresponding algorithms) to some another word.
        s1 - main string, s2 - string from list for comparison
    """
    fpr = fuzz.partial_ratio(s1, s2)
    jac_metaphone = (1 - distance.jaccard(
        jellyfish.metaphone(unicode(s1)).lower(),
        jellyfish.metaphone(unicode(s2)).lower())) * 100
    jac_soundex = (1 - distance.jaccard(
        jellyfish.soundex(unicode(s1)).lower(),
        jellyfish.soundex(unicode(s2)).lower())) * 100
    jac_mrc = (1 - distance.jaccard(
        jellyfish.match_rating_codex(unicode(s1)).lower(),
        jellyfish.match_rating_codex(unicode(s2)).lower())) * 100
    return fpr >= 50 and jac_soundex > 70 and jac_metaphone > 65 and jac_mrc > 65

コード例 #5

0

ファイルを表示

ファイル: views.py プロジェクト: dbarlett/namespect

def fuzzy(string):
    return jsonify({
        "metaphone": jellyfish.metaphone(string),
        "soundex": jellyfish.soundex(string),
        "nysiis": jellyfish.nysiis(string),
        "match_rating_codex": jellyfish.match_rating_codex(string),
    })

コード例 #6

0

ファイルを表示

def simple_example():
    # String comparison.
    str1, str2 = u'jellyfish', u'smellyfish'

    print("jellyfish.levenshtein_distance({}, {}) = {}.".format(
        str1, str2, jellyfish.levenshtein_distance(str1, str2)))
    print("jellyfish.damerau_levenshtein_distance({}, {}) = {}.".format(
        str1, str2, jellyfish.damerau_levenshtein_distance(str1, str2)))
    print("jellyfish.hamming_distance({}, {}) = {}.".format(
        str1, str2, jellyfish.hamming_distance(str1, str2)))
    print("jellyfish.jaro_distance({}, {}) = {}.".format(
        str1, str2, jellyfish.jaro_distance(str1, str2)))
    print("jellyfish.jaro_similarity({}, {}) = {}.".format(
        str1, str2, jellyfish.jaro_similarity(str1, str2)))
    print("jellyfish.jaro_winkler({}, {}) = {}.".format(
        str1, str2, jellyfish.jaro_winkler(str1, str2)))
    print("jellyfish.jaro_winkler_similarity({}, {}) = {}.".format(
        str1, str2, jellyfish.jaro_winkler_similarity(str1, str2)))
    print("jellyfish.match_rating_comparison({}, {}) = {}.".format(
        str1, str2, jellyfish.match_rating_comparison(str1, str2)))

    #--------------------
    # Phonetic encoding.
    ss = u'Jellyfish'

    print("jellyfish.metaphone({}) = {}.".format(ss, jellyfish.metaphone(ss)))
    print("jellyfish.soundex({}) = {}.".format(ss, jellyfish.soundex(ss)))
    print("jellyfish.nysiis({}) = {}.".format(ss, jellyfish.nysiis(ss)))
    print("jellyfish.match_rating_codex({}) = {}.".format(
        ss, jellyfish.match_rating_codex(ss)))

コード例 #7

0

ファイルを表示

def compare_context(phraselist_nst, ngramlist):
    baselist = [
        jf.match_rating_codex(k.decode('utf-8', 'ignore')) for k in ngramlist
        if k not in stwords
    ]

    for wd in phraselist_nst:
        fl = 0
        phonetic = jf.match_rating_codex(wd.decode('utf-8', 'ignore'))
        for k in baselist:
            if jf.levenshtein_distance(phonetic, k) <= 1:
                fl = 1
                break
        if (fl == 0):
            return False
    return True

コード例 #8

0

ファイルを表示

    def test_match_rating_codex(self):
        cases = [("Byrne", "BYRN"),
                 ("Boern", "BRN"),
                 ("Smith", "SMTH"),
                 ("Smyth", "SMYTH"),
                 ("Catherine", "CTHRN"),
                 ("Kathryn", "KTHRYN"),
                 ]

        for (s1, s2) in cases:
            self.assertEqual(jellyfish.match_rating_codex(s1), s2)

コード例 #9

0

ファイルを表示

def measure_string_distance(s1, s2, method):
    '''
            Four methods will be used with method code from 1 to 4
            Two methods focused on string similarity and the other two will be focused on phonetic encoding
            Method code to method name:
            1. jaro-winkler distance
            2. damerau-levenshtein distance
            3. Metaphone
            4. NYSIIS
            5. match_rating_codex

            note:
                    for methods 4,5 and 6, they only can provide results as 1 (match) or 0 (not match)
                    for methods 1 and 2, the methods will return a value in range [0, 1]
    '''
    result = 0

    if s1 == '' or s2 == '':
        return result

    if method == 1:
        result = jellyfish.jaro_winkler(s1, s2)
    elif method == 2:
        try:
            diff = jellyfish.damerau_levenshtein_distance(s1, s2)
            result = 1 - (diff / max(len(s1), len(s2)))
        except:
            result = 0
    elif method == 3:
        result = 1 if jellyfish.metaphone(s1) == jellyfish.metaphone(s2) else 0
    elif method == 4:
        result = 1 if jellyfish.nysiis(s1) == jellyfish.nysiis(s2) else 0
    elif method == 5:
        result = 1 if jellyfish.match_rating_codex(
            s1) == jellyfish.match_rating_codex(s2) else 0
    # elif method == 0:
    # 	raise ValueError("provide a method code (1-6).")
    # else:
    # 	raise ValueError("the method parameter must be in the range from 1 to 6.")

    return result

コード例 #10

0

ファイルを表示

def featurize(df):
    if len(df.columns)==3:
        df.columns=['a', 'b', 'target']
    elif len(df.columns)==2:
        df.columns=['a', 'b']
    else:
        df = df.rename(columns={df.columns[0]: 'a', df.columns[1]: 'b' })
        
    df['TM_A'] = df.apply(lambda row: re.sub(
        '[^a-zA-Z]+', '', unidecode.unidecode(row['a']).lower()), axis=1)
    df['TM_B'] = df.apply(lambda row: re.sub(
        '[^a-zA-Z]+', '', unidecode.unidecode(row['b']).lower()), axis=1)

    df['partial'] = df.apply(lambda row: fuzz.partial_ratio(row.TM_A,row.TM_B), axis=1)
    df['tkn_sort'] = df.apply(lambda row: fuzz.token_sort_ratio(row.TM_A,row.TM_B), axis=1)
    df['tkn_set'] = df.apply(lambda row: fuzz.token_set_ratio(row.TM_A,row.TM_B), axis=1)
    
    df['sum_ipa'] = df.apply(lambda row: sum_ipa(row.TM_A,row.TM_B), axis=1)
    
    # Jellyfish levenshtein
    df['levenshtein']= df.apply(lambda row: jellyfish.levenshtein_distance(row.TM_A,row.TM_B), axis=1)
    # Scale Levenshtein column
    scaler = MinMaxScaler()
    df['levenshtein'] = scaler.fit_transform(df['levenshtein'].values.reshape(-1,1))

    # Jellyfish phoneme
    df['metaphone'] = df.apply(
        lambda row: 1 if jellyfish.metaphone(row.TM_A)==jellyfish.metaphone(row.TM_B) else 0, axis=1)
    df['nysiis'] = df.apply(
        lambda row: 1 if jellyfish.nysiis(row.TM_A)==jellyfish.nysiis(row.TM_B) else 0, axis=1)
    df['mtch_rtng_cdx'] = df.apply(
        lambda row: 1 if jellyfish.match_rating_codex(row.TM_A)==jellyfish.match_rating_codex(row.TM_B) else 0, axis=1)
    
    df['pshp_soundex_first'] = df.apply(
        lambda row: 1 if pshp_soundex_first.encode(row.TM_A)==pshp_soundex_first.encode(row.TM_B) else 0, axis=1)
    
    for i, algo in enumerate(algos):
            df[algo_names[i]] = df.apply(lambda row: algo.sim(row.TM_A, row.TM_B), axis=1)
    
    return df

コード例 #11

0

ファイルを表示

def get_hash(word, hash_type):
    if hash_type == "SOUNDEX":
        hash = jellyfish.soundex(word)
    elif hash_type == "NYSIIS":
        hash = jellyfish.nysiis(word)
    elif hash_type == "MRA":
        hash = jellyfish.match_rating_codex(word)
    elif hash_type == "METAPHONE":
        hash = jellyfish.metaphone(word)
    else:
        raise NotImplementedError(
            "approach '{}' not implemented".format(hash_type))
    return hash

コード例 #12

0

ファイルを表示

ファイル: spellChecker.py プロジェクト: asidharth019/NLP_spell_checker

    def correct(self, wrongWord):
        candidates = []
        candidateDistList = []
        wWTGrams = self.getGrams(wrongWord, SpellChecker.invertMapGram)

        for trigram in wWTGrams:
            if trigram in SpellChecker.invertTriMap:
                candidates = candidates + SpellChecker.invertTriMap[trigram]
        candidates = list(set(candidates))
        #print (len(candidates))

        for candidate in candidates:
            if abs(len(candidate) - len(wrongWord)) > 2:
                continue
            if wrongWord == candidate:
                continue
            ed = self.compED(candidate, wrongWord)
            jd = jellyfish.jaro_distance(wrongWord, candidate)
            gd = self.getJackSim(
                self.getGrams(candidate, SpellChecker.jackardGram),
                self.getGrams(wrongWord, SpellChecker.jackardGram))
            score = gd * SpellChecker.dictCountMap[
                candidate] / SpellChecker.totalCount * (1 /
                                                        (ed + 1)) * (1 /
                                                                     (jd + 1))
            if jellyfish.metaphone(wrongWord) == jellyfish.metaphone(
                    candidate):
                score = score + 0.1
            if jellyfish.soundex(wrongWord) == jellyfish.soundex(candidate):
                score = score + 0.1
            if jellyfish.nysiis(wrongWord) == jellyfish.nysiis(candidate):
                score = score + 0.1
            if jellyfish.match_rating_codex(
                    wrongWord) == jellyfish.match_rating_codex(candidate):
                score = score + 0.1
            tmpCandidate = ScoreRcd(candidate, ed, score)
            candidateDistList.append(tmpCandidate)
        candidateDistList.sort()
        return candidateDistList

コード例 #13

0

ファイルを表示

ファイル: Soundex.py プロジェクト: fakegit/WintersWrath

def compare(word1, dictionary):
    c1_1 = jellyfish.soundex(word1)
    c2_1 = jellyfish.metaphone(word1)
    c3_1 = jellyfish.nysiis(word1)
    c4_1 = jellyfish.match_rating_codex(word1)

    result = (0, None)

    for word2 in dictionary:
        c1_2 = jellyfish.soundex(word2)
        c2_2 = jellyfish.metaphone(word2)
        c3_2 = jellyfish.nysiis(word2)
        c4_2 = jellyfish.match_rating_codex(word2)
        c1 = levenshtein(c1_1, c1_2)
        c2 = levenshtein(c2_1, c2_2)
        c3 = levenshtein(c3_1, c3_2)
        c4 = levenshtein(c4_1, c4_2)

        sim = c1 * 0.2 + c2 * 0.3 + c3 * 0.3 + c4 * 0.2

        if sim > result[0]:
            result = (sim, word2)

    return result

コード例 #14

0

ファイルを表示

def phonetic_similarity(word1, word2):

    encoding_1 = {}
    encoding_2 = {}
    algorithm_similarity_score = {}
    cumulative_score = 0

    encoding_1['metaphone'] = jellyfish.metaphone(word1)
    encoding_1['nysiis'] = jellyfish.nysiis(word1)
    encoding_1['soundex'] = jellyfish.soundex(word1)
    encoding_1['match_rating_codex'] = jellyfish.match_rating_codex(word1)

    encoding_2['metaphone'] = jellyfish.metaphone(word2)
    encoding_2['nysiis'] = jellyfish.nysiis(word2)
    encoding_2['soundex'] = jellyfish.soundex(word2)
    encoding_2['match_rating_codex'] = jellyfish.match_rating_codex(word2)

    for algorithm in encoding_1.keys():
        algorithm_similarity_score[algorithm] = jellyfish.levenshtein_distance(
            encoding_1[algorithm],
            encoding_2[algorithm]) * weightage[algorithm]
        cumulative_score += algorithm_similarity_score[algorithm]

    return cumulative_score

コード例 #15

0

ファイルを表示

ファイル: jellyFishTest.py プロジェクト: stcybrdgs/NLP-Matching

def main():
    # declare test strings
    # rem: u prefix is required jellyfish convention
    str1 = u'Jellyfish' 
    str2= u'Smellyfish'
    
    
    # test Phonetic Encoding
    print('\nPhonetic Encoding ----------------------------')
    
    # Metaphone
    r1 = jellyfish.metaphone(str1)
    r2 = jellyfish.metaphone(str2)
    print('Metaphone: ', r1, ", ", r2)
    
    # American Soundex
    r1 = jellyfish.soundex(str1)
    r2 = jellyfish.soundex(str2)
    print('Soundex: ', r1, ", ", r2)
    
    # NYSIIS
    r1 = jellyfish.nysiis(str1)
    r2 = jellyfish.nysiis(str2)
    print('NYSIIS: ', r1, ", ", r2)

    # Match Rating Codex    
    r1 = jellyfish.match_rating_codex(str1)
    r2 = jellyfish.match_rating_codex(str2)
    print('Match Rating Codex: ', r1, ", ", r2)
    
    
    # test Stemming
    print('\nStemming -------------------------------------')
    pStr1 = u'Jellyfished'
    pStr2 = u'Smellyfishing'
    r1 = jellyfish.porter_stem(str1)
    r2 = jellyfish.porter_stem(str2)
    print('Porter Stemmer: ', r1, ", ", r2)
    
    
    # test String Comparison
    print('\nString Comparisons ---------------------------')
    
    # Levenshtein Distance
    r = jellyfish.levenshtein_distance(str1, str2)
    print('Levenshtein Distance: ', r)

    # Damerau-Levenshtein Distance
    r = jellyfish.damerau_levenshtein_distance(str1, str2)
    print('Damerau-Levenshtein Distance: ', r)
    
    # Hamming Distance
    result = jellyfish.hamming_distance(str1, str2)
    print('Hamming Distance: ', r)

    # Jaro Distance
    result = jellyfish.jaro_distance(str1, str2)
    print('Jaro Distance: ', r)
    
    # Jaro-Winkler Distance
    result = jellyfish.jaro_winkler(str1, str2)
    print('Jaro-Winkler Distance: ', r)
    
    # Match Rating Approach (comparison)
    r = jellyfish.match_rating_comparison(str1, str2)
    print('Match Rating Comparison: ', r)
     
        
    # end program
    print('Done.')

コード例 #16

0

ファイルを表示

ファイル: l7_jellyfish_levenshtein.py プロジェクト: coder352/shellscript

#     Jaro Distance
#     Jaro-Winkler Distance
#     Match Rating Approach Comparison
#     Hamming Distance

# Phonetic encoding:
#     American Soundex
#     Metaphone
#     NYSIIS (New York State Identification and Intelligence System)
#     Match Rating Codex
import jellyfish
print(jellyfish.levenshtein_distance('jellyfish', 'smellyfish'))  # 2; 编辑距离
print(jellyfish.jaro_distance('jellyfish', 'smellyfish'))  # 0.89629629629629637
print(jellyfish.damerau_levenshtein_distance('jellyfish', 'jellyfihs'))  # 1; 编辑距离, 带翻转的
print(jellyfish.metaphone('Jellyfish'))  # 'JLFX'
print(jellyfish.soundex('Jellyfish'))  # 'J412'
print(jellyfish.nysiis('Jellyfish'))  # 'JALYF'
print(jellyfish.match_rating_codex('Jellyfish'))  # 'JLLFSH'

##################################################################
## Lenvenshtein
import Levenshtein
print(Levenshtein.hamming('hello', 'helol'))  # 2; 计算汉明距离; 要求 str1 和 str2 必须长度一致; 是描述两个等长字串之间对应位置上不同字符的个数
print(Levenshtein.distance('hello', 'helol'))  # 2; 计算编辑距离(也成 Levenshtein 距离); 是描述由一个字串转化成另一个字串最少的操作次数, 在其中的操作包括插入 & 删除 & 替换
print(Levenshtein.distance('hello world asdf', 'helolaaaa world asdf'))  # 5
print(Levenshtein.ratio('hello', 'helol'))  # 0.8; 计算莱文斯坦比; 计算公式 r = (sum - ldist) / sum, 其中 sum 是指 str1 和 str2 字串的长度总和, ldist 是类编辑距离
# 注意: 这里的类编辑距离不是 2 中所说的编辑距离, 2 中三种操作中每个操作+1, 而在此处, 删除、插入依然+1, 但是替换+2
# 这样设计的目的: ratio('a', 'c'), sum=2, 按 2 中计算为(2-1)/2 = 0.5,' a','c'没有重合, 显然不合算, 但是替换操作+2, 就可以解决这个问题
print(Levenshtein.jaro('hello', 'helol'))  # 0.9333333333333332; 计算 jaro 距离; 用于健康普查
print(Levenshtein.jaro_winkler('hello', 'helol'))  # 0.9533333333333333; 计算 Jaro – Winkler 距离

コード例 #17

0

ファイルを表示

ファイル: spellCheckNew.py プロジェクト: asidharth019/NLP_spell_checker

			continue
		#if ed ==0:
		#	ed =1
		jd=jellyfish.jaro_distance(wrongWord,candidate)
		#if jd==0:
		#	jd =1
		gd = getJackSim(getGrams(candidate,jackardGram),getGrams(wrongWord,jackardGram))
		score = gd * dictCountMap[candidate]/totalCount * (1/(ed+1)) * (1/(jd+1))
		#New Code
		if jellyfish.metaphone(wrongWord) == jellyfish.metaphone(candidate):
			score = score+0.1
		if jellyfish.soundex(wrongWord) == jellyfish.soundex(candidate):
			score = score+0.1
		if jellyfish.nysiis(wrongWord) == jellyfish.nysiis(candidate):
			score = score+0.1
		if jellyfish.match_rating_codex(wrongWord) == jellyfish.match_rating_codex(candidate):
			score = score+0.1
		
		tmpCandidate = ScoreRcd(candidate,ed, score) ;
		candidateDistList.append(tmpCandidate)
	candidateDistList.sort()

	maxIter = 10
	if len(candidateDistList) < maxIter:
		maxIter = len(candidateDistList)

	for i in range(0,maxIter):
		out =  out + candidateDistList[i].getScore() + ' '
	print (out)

コード例 #18

0

ファイルを表示

ファイル: sample.py プロジェクト: sahitilucky/spellbee

import os
import spellcheck
import jellyfish

s = u'piece'
s1 = jellyfish.match_rating_codex(u'place').lower().decode('utf-8', 'ignore')
s2 = jellyfish.match_rating_codex(u'plaid').lower().decode('utf-8', 'ignore')
print s1, s2
print jellyfish.levenshtein_distance(s1, s2)

#print jellyfish.levenshtein_distance(s,u'thruout')

コード例 #19

0

ファイルを表示

ファイル: transformations.py プロジェクト: francescoinfante/identity

 def transform(self, data):
     if isinstance(data, basestring):
         return match_rating_codex(unicode(data))

コード例 #20

0

ファイルを表示

ファイル: phonetic_encoder.py プロジェクト: stcybrdgs/wxMatchingEngine

def match_rating_codex(s):
    return jellyfish.match_rating_codex(s)

コード例 #21

0

ファイルを表示

from jellyfish import soundex, metaphone, match_rating_codex

sn = open("senticnet5.txt", "r")
for line in sn:
    sndx = []
    metaphn = []
    codex = []
    concept = (line.split('\t')[0])
    words = concept.split('_')
    for i in range(len(words)):
        sndx.append(soundex(words[i]))
        metaphn.append(metaphone(words[i]))
        codex.append(match_rating_codex(words[i]))
    print(concept, '\t', '_'.join(metaphn))

コード例 #22

0

ファイルを表示

import re
from jellyfish import soundex,metaphone,match_rating_codex

sentic = open("codex.txt", "r").read()
sentic1 = open("concepts+soundex.txt", "r").read()
sentic2 = open("concepts+metaphone.txt", "r").read()
#text = sentic.read().strip().split()
string = input("Enter a string: ")
cdx = match_rating_codex(string)
print(cdx)
sdx = soundex(string)
meta = metaphone(string)

print("Codex Results\n")
for line in sentic.split("\n"):
    #print (line)
    if cdx in line.split(" \t "):
        print (line)
        #if sndx in soundx and len(sndx) == len(soundx):
        #    print(soundx)
print ("Soundex Results\n")

for line in sentic1.split("\n"):
    if sdx in line.split(" \t "):
        print (line)

print("Metaphone Results\n")

for line in sentic2.split("\n"):
    if meta in line.split(" \t "):
        print(line)

コード例 #23

0

ファイルを表示

ファイル: wine_df.py プロジェクト: aixpact/NLP

# nx.draw(G, pos, with_labels=True, node_size=0)

# ---------------------------------------------> jellyfish <-------------------------------------------- #

# String comparison
grape_1 = 'Ma'
grape_2 = 'Mariette'
jf.levenshtein_distance(grape_1, grape_2)
jf.jaro_distance(grape_1, grape_2)
jf.damerau_levenshtein_distance(grape_1, grape_2)

# Phonetic encoding
jf.metaphone(grape_1)
jf.soundex(grape_1)
jf.nysiis(grape_1)
jf.match_rating_codex(grape_1)
jf.match_rating_codex(grape_2)

# ---------------------------------------------> Udacity <-------------------------------------------- #

scores = [3.0, 1.0, 0.2]

scores2 = np.array([[1, 2, 3, 6], [2, 4, 5, 6], [3, 8, 7, 6]])


def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    return np.exp(x) / np.sum(np.exp(x), axis=0)


print(softmax(scores))

コード例 #24

0

ファイルを表示

ファイル: functions.py プロジェクト: zhuohuwu0603/ceja

def match_rating_codex(s):
    return None if s == None else J.match_rating_codex(s)

コード例 #25

0

ファイルを表示

ファイル: jellyfish_example.py プロジェクト: yuandra/scraperwiki-scraper-vault

import jellyfish
print jellyfish.levenshtein_distance('jellyfish', 'smellyfish')
#2
print jellyfish.jaro_distance('jellyfish', 'smellyfish')
#0.89629629629629637
print jellyfish.damerau_levenshtein_distance('jellyfish', 'jellyfihs')
#1

print jellyfish.metaphone('Jellyfish')
#'JLFX'
print jellyfish.soundex('Jellyfish')
#'J412'
print jellyfish.nysiis('Jellyfish')
#'JALYF'
print jellyfish.match_rating_codex('Jellyfish')
#'JLLFSH'
import jellyfish
print jellyfish.levenshtein_distance('jellyfish', 'smellyfish')
#2
print jellyfish.jaro_distance('jellyfish', 'smellyfish')
#0.89629629629629637
print jellyfish.damerau_levenshtein_distance('jellyfish', 'jellyfihs')
#1

print jellyfish.metaphone('Jellyfish')
#'JLFX'
print jellyfish.soundex('Jellyfish')
#'J412'
print jellyfish.nysiis('Jellyfish')
#'JALYF'
print jellyfish.match_rating_codex('Jellyfish')