Python Soundex Exemples, pyphonetics.Soundex Python Exemples

Exemple #1

0

Afficher le fichier

def test_soundex():
    tests = [
        ('R163', 'Rupert'),
        ('R163', 'Robert'),
        ('R150', 'Rubin'),
        ('A261', 'Ashcroft'),
        ('A261', 'Ashcraft'),
        ('T522', 'Tymczak'),
        ('P123', 'Pfister'),
        ('A536', 'Andrew'),
        ('W252', 'Wozniak'),
        ('C423', 'Callister'),
        ('H400', 'Hello'),
        ('M635', 'Martin'),
        ('B656', 'Bernard'),
        ('F600', 'Faure'),
        ('P620', 'Perez'),
        ('G620', 'Gros'),
        ('C120', 'Chapuis'),
        ('B600', 'Boyer'),
        ('G360', 'Gauthier'),
        ('R000', 'Rey'),
        ('B634', 'Barthélémy'),
        ('H560', 'Henry'),
        ('M450', 'Moulin'),
        ('R200', 'Rousseau')
    ]

    soundex = Soundex()
    for test in tests:
        assert soundex.phonetics(test[1]) == test[0]

Exemple #2

0

Afficher le fichier

def test_soundex():
    soundex = Soundex()

    assert soundex.phonetics('h') == 'H000'
    assert soundex.phonetics('d') == 'D000'

    with pytest.raises(EmptyStringError):
        soundex.phonetics('')

Exemple #3

0

Afficher le fichier

Fichier : PhoneticModule.py Projet : rizveeredwan/UID-Generation-Based-On-Polynomial-Hashing

class PhoneticModule:
    def __init__(self):
        self.soundex = Soundex()
        self.metaphone = Metaphone()
        self.fuzzySoundex = FuzzySoundex()
        self.lein = Lein()
        self.refinedSoundex = RefinedSoundex()

        self.phoneticSimilarityWeight = {}
        self.phoneticSimilarityWeight['soundex'] = 0.2
        self.phoneticSimilarityWeight['metaphone'] = 0.2
        self.phoneticSimilarityWeight['fuzzySoundex'] = 0.2
        self.phoneticSimilarityWeight['lein'] = 0.2
        self.phoneticSimilarityWeight['refinedSoundex'] = 0.2

    def Calculation(self, word1, word2):
        #print(self.soundex.phonetics(word1))
        #print(self.soundex.phonetics(word2))
        res = 0.0
        res = self.SoundexMethod(
            word1, word2) * self.phoneticSimilarityWeight['soundex'] * 1.0
        res = res + self.MetaphoneMethod(
            word1, word2) * self.phoneticSimilarityWeight['metaphone'] * 1.0
        res = res + self.FuzzySoundexMethod(
            word1, word2) * self.phoneticSimilarityWeight['fuzzySoundex'] * 1.0
        res = res + self.LeinMethod(
            word1, word2) * self.phoneticSimilarityWeight['lein'] * 1.0
        res = res + self.RefinedSoundexMethod(
            word1,
            word2) * self.phoneticSimilarityWeight['refinedSoundex'] * 1.0
        print(res)
        return res

    def PhoneticLayerCreation(self, word):
        string = ""
        string = self.soundex.phonetics(word)
        string = string + '_' + self.metaphone.phonetics(word)
        string = string + '_' + self.fuzzySoundex.phonetics(word)
        string = string + '_' + self.lein.phonetics(word)
        string = string + '_' + self.refinedSoundex.phonetics(word)
        return string

    def SoundexMethod(self, word1, word2):
        return self.soundex.distance(word1, word2, metric='levenshtein')

    def MetaphoneMethod(self, word1, word2):
        return self.metaphone.distance(word1, word2, metric='levenshtein')

    def FuzzySoundexMethod(self, word1, word2):
        return self.fuzzySoundex.distance(word1, word2, metric='levenshtein')

    def LeinMethod(self, word1, word2):
        return self.lein.distance(word1, word2, metric='levenshtein')

    def RefinedSoundexMethod(self, word1, word2):
        return self.refinedSoundex.distance(word1, word2, metric='levenshtein')

Exemple #4

0

Afficher le fichier

Fichier : PhoneticModule.py Projet : rizveeredwan/UID-Generation-Based-On-Polynomial-Hashing

    def __init__(self):
        self.soundex = Soundex()
        self.metaphone = Metaphone()
        self.fuzzySoundex = FuzzySoundex()
        self.lein = Lein()
        self.refinedSoundex = RefinedSoundex()

        self.phoneticSimilarityWeight = {}
        self.phoneticSimilarityWeight['soundex'] = 0.2
        self.phoneticSimilarityWeight['metaphone'] = 0.2
        self.phoneticSimilarityWeight['fuzzySoundex'] = 0.2
        self.phoneticSimilarityWeight['lein'] = 0.2
        self.phoneticSimilarityWeight['refinedSoundex'] = 0.2

Exemple #5

0

Afficher le fichier

def test_soundex_homophones():
    tests = [
        ('Braz', 'Broz'),
        ('Caren', 'Caron', 'Carren', 'Charon', 'Corain', 'Coram', 'Corran', 
         'Corrin', 'Corwin', 'Curran', 'Curreen','Currin', 'Currom', 'Currum', 'Curwen'),
        ('Hairs', 'Hark', 'Hars', 'Hayers', 'Heers', 'Hiers'),
        ('Lambard', 'Lambart', 'Lambert', 'Lambird', 'Lampaert', 'Lampard', 
         'Lampart', 'Lamperd', 'Lampert', 'Lamport','Limbert', 'Lombard'),
        ('Nolton', 'Noulton')
    ]

    soundex = Soundex()
    for test in tests:
        phonetics = [soundex.phonetics(word) for word in test]
        assert len(set(phonetics)) == 1  # all phonetics are the same, so set size = 1

Exemple #6

0

Afficher le fichier

Fichier : SpeechRecognition.py Projet : markgwaps04/digos-ccts-v2

    def get_info(self):

        recognize = self.__params__["recognize"]
        recognize = "".join(recognize)
        base = self.__params__["base"]

        attributes = {
            "index": self.__params__["index"],
            "miscue_base": base,
            "miscue_result": recognize
        }

        rs = RefinedSoundex()
        soundex = Soundex()

        if str(base).isnumeric() or str(recognize).isnumeric():
            attributes['type'] = 0
            return attributes
        else:

            distance = rs.distance(base, recognize)

            sounds = soundex.sounds_like(base, recognize)

            if distance < 1 or sounds:

                attributes['type'] = 0
                return attributes

            else:

                attributes['type'] = 2
                return attributes

                pass

        pass

Exemple #7

0

Afficher le fichier

import markovify
import string
import nltk
from nltk.tokenize import RegexpTokenizer
from Phyme import Phyme
from Phyme.util import flatten
from pyphonetics import Soundex
from subprocess import Popen
nltk.download('cmudict')
ph = Phyme()
soundex = Soundex()

from twilio.rest import Client

# Your Account Sid and Auth Token from twilio.com/console
# DANGER! This is insecure. See http://twil.io/secure
account_sid = ' '
auth_token = ' '
client = Client(account_sid, auth_token)

print("Loading model... ")
with open('small_markov_model_state_3.json', 'rb') as f:
    text = f.read()
    markov_model = markovify.NewlineText.from_json(text)


#returns list of words that rhyme
def find_rhymes(word):
    rhyme_list = []
    try:
        rhyme_list.append(

Exemple #8

0

Afficher le fichier

Fichier : Phonetic Hashing.py Projet : Keerthana2701/Natural-Language-Processing-NLTK-and-Gensim

# -*- coding: utf-8 -*-
"""
Created on Sat Oct 24 16:16:46 2020

@author: Vikee
"""


from pyphonetics import Soundex

soundex = Soundex()
a=soundex.phonetics('Allowed')
b=soundex.phonetics('Aloud')
c=soundex.sounds_like('Allowed', 'Aloud')

Exemple #9

0

Afficher le fichier

def phonetic(data,text):


    def clean_text(raw_text):
        regex = re.compile('[^a-zA-Z\s:]')
        # First parameter is the replacement, second parameter is your input string
        filtered_text = regex.sub('', raw_text)
        # Eliminate multiple spaces
        filtered_text = re.sub(r'[\s]+', ' ', filtered_text)
        # Strip out terminal and leading spaces
        return filtered_text.strip()


    words = data
    sanitized_text=clean_text(text)


    token = word_tokenize(sanitized_text)
    # load stop words
    stop_words = stopwords.words('english')

    # Remove stop words
    token = [word for word in token if word not in stop_words]
    n = len(token)

    soundex = Soundex()
    metaphone = Metaphone()
    rs = RefinedSoundex()
    fs = FuzzySoundex()
    algorithms = [soundex, metaphone, rs, fs]

    cc = dict()
    # conversion of list of tuple to list of list
    for i in range(1, n):
        ngram_list = list(nltk.ngrams(token, i))
        ngram = [" ".join(i) for i in ngram_list]
        cc[str(i)] = ngram

    ngrams = sum(cc.values(), [])
    ngrams = [item for item in ngrams if not item.isdigit()]



    dict1 = dict()


    # Iterating over values
    for i in ngrams:
        for j in words:

            total = 0
            for entry in algorithms:
                code1 = entry.phonetics(i)
                code2 = entry.phonetics(j)

                similar = entry.sounds_like(i,j)
                if similar == True:
                    total += 1

            if total >= 3:
                dict1[str(i)] = j


            total = 0

    dict1= dict(reversed(list(dict1.items())))
    print(dict1)

    def multipleReplace(sentence, dict):
        punct= list(string.punctuation)
        punct=[i for i in punct if i not in [","]]
        punct="".join(punct)
        a=[]
        for i in sentence.split():
            j=i.translate(str.maketrans('','',punct))
            a.append(j)
        sentence=' '.join(a)


        """
        take a text and replace words that match the key in a dictionary
        with the associated value, return the changed text
        """
        for key in dict:
            sentence = sentence.replace(key, dict[key])
        return sentence

    result= multipleReplace(text, dict1)
    print(result)

    return result

Exemple #10

0

Afficher le fichier

Fichier : Phonetic_embed.py Projet : rizveeredwan/UID-Generation-Based-On-Polynomial-Hashing

# -- weight
weight = {
    "soundex": 0.2,
    "caverphone": 0.2,
    "metaphone": 0.5,
    "nysiis": 0.1
}

# -- algorithms
algorithms = ["soundex", "caverphone", "metaphone", "nysiis"]

# -- total
total = 0.0
for entry in algorithms:
    code1 = codeList1[entry]
    code2 = codeList2[entry]
    lev = levenshtein (code1, code2)
    currentWeight = weight[entry]
    print ("comparing %s with %s for %s (%0.2f: weight %0.2f)" % (code1, code2, entry, lev, currentWeight))
    subtotal = lev * currentWeight
    total += subtotal

print ("total: %0.2f" % total)
"""

from pyphonetics import Soundex
soundex = Soundex()
print(soundex.phonetics('Lucky'))
print(soundex.phonetics('Lucki'))
print(soundex.sounds_like('Rizvee', 'Rizvi'))

Exemple #11

0

Afficher le fichier

def insert_drug_database(drug_name):

    drug_id = 0
    if drug_name:
        soundex = Soundex()
        metaphone = Metaphone()
        rs = RefinedSoundex()
        file_path = "utils//DRUGS_ALL_EDITTED.csv"
        file_path = pkg_resources.resource_filename(__name__, file_path)
        file = open(file_path, "r")
        section = file.read()
        parts = re.split('[\n]', section)
        min_dist = 100
        new_name = re.sub("چ", "غ", drug_name)
        new_name = re.sub("ﻏ", "غ", new_name)
        new_name = normalize_arabic(new_name)
        name_en = translate_drug_name(drug_name)
        equals = []
        min_index = -1
        min_dist_all = 100
        min_index_all = -1
        chosen = False

        for part in parts:

            if distance_words(name_en, part) == 0 or distance_words(name_en, part) == 1:
                chosen = True
                print(" Matched To ->", part)
                drug_id = insert_drug(drug_name, normalize_arabic(drug_name), part)
                return drug_id, drug_name

            dist = rs.distance(name_en, part)
            if dist <min_dist_all:
                min_dist_all = dist
                min_index_all = parts.index(part)

            if soundex.sounds_like(new_name, part) or soundex.sounds_like(name_en, part):

                if rs.distance(new_name, part) < min_dist:
                    min_dist = rs.distance(new_name, part)
                    min_index = parts.index(part)
                equals.append((part,metaphone.phonetics(part)))

        if min_index != -1:
            for equ in equals:
                if equ[1] == metaphone.phonetics(name_en) or equ == metaphone.phonetics(new_name):
                    drug_id = insert_drug(drug_name, normalize_arabic(drug_name), equ[0])
                    chosen = True
                    return drug_id, drug_name

        if not chosen and min_index != -1:
            chosen = True
            drug_id = insert_drug(drug_name, normalize_arabic(drug_name), parts[min_index])
            return drug_id, drug_name

        if not chosen:
            drug_id = insert_drug(drug_name, normalize_arabic(drug_name), parts[min_index_all])
            return drug_id, drug_name

    else:
        drug_id = insert_drug("----------", "----------", "----------")
        drug_name = "default"
        return drug_id, drug_name

    return drug_id, drug_name