Exemple #1
0
def test_soundex():
    tests = [
        ('R163', 'Rupert'),
        ('R163', 'Robert'),
        ('R150', 'Rubin'),
        ('A261', 'Ashcroft'),
        ('A261', 'Ashcraft'),
        ('T522', 'Tymczak'),
        ('P123', 'Pfister'),
        ('A536', 'Andrew'),
        ('W252', 'Wozniak'),
        ('C423', 'Callister'),
        ('H400', 'Hello'),
        ('M635', 'Martin'),
        ('B656', 'Bernard'),
        ('F600', 'Faure'),
        ('P620', 'Perez'),
        ('G620', 'Gros'),
        ('C120', 'Chapuis'),
        ('B600', 'Boyer'),
        ('G360', 'Gauthier'),
        ('R000', 'Rey'),
        ('B634', 'Barthélémy'),
        ('H560', 'Henry'),
        ('M450', 'Moulin'),
        ('R200', 'Rousseau')
    ]

    soundex = Soundex()
    for test in tests:
        assert soundex.phonetics(test[1]) == test[0]
Exemple #2
0
def test_soundex():
    soundex = Soundex()

    assert soundex.phonetics('h') == 'H000'
    assert soundex.phonetics('d') == 'D000'

    with pytest.raises(EmptyStringError):
        soundex.phonetics('')
class PhoneticModule:
    def __init__(self):
        self.soundex = Soundex()
        self.metaphone = Metaphone()
        self.fuzzySoundex = FuzzySoundex()
        self.lein = Lein()
        self.refinedSoundex = RefinedSoundex()

        self.phoneticSimilarityWeight = {}
        self.phoneticSimilarityWeight['soundex'] = 0.2
        self.phoneticSimilarityWeight['metaphone'] = 0.2
        self.phoneticSimilarityWeight['fuzzySoundex'] = 0.2
        self.phoneticSimilarityWeight['lein'] = 0.2
        self.phoneticSimilarityWeight['refinedSoundex'] = 0.2

    def Calculation(self, word1, word2):
        #print(self.soundex.phonetics(word1))
        #print(self.soundex.phonetics(word2))
        res = 0.0
        res = self.SoundexMethod(
            word1, word2) * self.phoneticSimilarityWeight['soundex'] * 1.0
        res = res + self.MetaphoneMethod(
            word1, word2) * self.phoneticSimilarityWeight['metaphone'] * 1.0
        res = res + self.FuzzySoundexMethod(
            word1, word2) * self.phoneticSimilarityWeight['fuzzySoundex'] * 1.0
        res = res + self.LeinMethod(
            word1, word2) * self.phoneticSimilarityWeight['lein'] * 1.0
        res = res + self.RefinedSoundexMethod(
            word1,
            word2) * self.phoneticSimilarityWeight['refinedSoundex'] * 1.0
        print(res)
        return res

    def PhoneticLayerCreation(self, word):
        string = ""
        string = self.soundex.phonetics(word)
        string = string + '_' + self.metaphone.phonetics(word)
        string = string + '_' + self.fuzzySoundex.phonetics(word)
        string = string + '_' + self.lein.phonetics(word)
        string = string + '_' + self.refinedSoundex.phonetics(word)
        return string

    def SoundexMethod(self, word1, word2):
        return self.soundex.distance(word1, word2, metric='levenshtein')

    def MetaphoneMethod(self, word1, word2):
        return self.metaphone.distance(word1, word2, metric='levenshtein')

    def FuzzySoundexMethod(self, word1, word2):
        return self.fuzzySoundex.distance(word1, word2, metric='levenshtein')

    def LeinMethod(self, word1, word2):
        return self.lein.distance(word1, word2, metric='levenshtein')

    def RefinedSoundexMethod(self, word1, word2):
        return self.refinedSoundex.distance(word1, word2, metric='levenshtein')
    def __init__(self):
        self.soundex = Soundex()
        self.metaphone = Metaphone()
        self.fuzzySoundex = FuzzySoundex()
        self.lein = Lein()
        self.refinedSoundex = RefinedSoundex()

        self.phoneticSimilarityWeight = {}
        self.phoneticSimilarityWeight['soundex'] = 0.2
        self.phoneticSimilarityWeight['metaphone'] = 0.2
        self.phoneticSimilarityWeight['fuzzySoundex'] = 0.2
        self.phoneticSimilarityWeight['lein'] = 0.2
        self.phoneticSimilarityWeight['refinedSoundex'] = 0.2
Exemple #5
0
def test_soundex_homophones():
    tests = [
        ('Braz', 'Broz'),
        ('Caren', 'Caron', 'Carren', 'Charon', 'Corain', 'Coram', 'Corran', 
         'Corrin', 'Corwin', 'Curran', 'Curreen','Currin', 'Currom', 'Currum', 'Curwen'),
        ('Hairs', 'Hark', 'Hars', 'Hayers', 'Heers', 'Hiers'),
        ('Lambard', 'Lambart', 'Lambert', 'Lambird', 'Lampaert', 'Lampard', 
         'Lampart', 'Lamperd', 'Lampert', 'Lamport','Limbert', 'Lombard'),
        ('Nolton', 'Noulton')
    ]

    soundex = Soundex()
    for test in tests:
        phonetics = [soundex.phonetics(word) for word in test]
        assert len(set(phonetics)) == 1  # all phonetics are the same, so set size = 1
    def get_info(self):

        recognize = self.__params__["recognize"]
        recognize = "".join(recognize)
        base = self.__params__["base"]

        attributes = {
            "index": self.__params__["index"],
            "miscue_base": base,
            "miscue_result": recognize
        }

        rs = RefinedSoundex()
        soundex = Soundex()

        if str(base).isnumeric() or str(recognize).isnumeric():
            attributes['type'] = 0
            return attributes
        else:

            distance = rs.distance(base, recognize)

            sounds = soundex.sounds_like(base, recognize)

            if distance < 1 or sounds:

                attributes['type'] = 0
                return attributes

            else:

                attributes['type'] = 2
                return attributes

                pass

        pass
Exemple #7
0
import markovify
import string
import nltk
from nltk.tokenize import RegexpTokenizer
from Phyme import Phyme
from Phyme.util import flatten
from pyphonetics import Soundex
from subprocess import Popen
nltk.download('cmudict')
ph = Phyme()
soundex = Soundex()

from twilio.rest import Client

# Your Account Sid and Auth Token from twilio.com/console
# DANGER! This is insecure. See http://twil.io/secure
account_sid = ' '
auth_token = ' '
client = Client(account_sid, auth_token)

print("Loading model... ")
with open('small_markov_model_state_3.json', 'rb') as f:
    text = f.read()
    markov_model = markovify.NewlineText.from_json(text)


#returns list of words that rhyme
def find_rhymes(word):
    rhyme_list = []
    try:
        rhyme_list.append(
# -*- coding: utf-8 -*-
"""
Created on Sat Oct 24 16:16:46 2020

@author: Vikee
"""


from pyphonetics import Soundex

soundex = Soundex()
a=soundex.phonetics('Allowed')
b=soundex.phonetics('Aloud')
c=soundex.sounds_like('Allowed', 'Aloud')
Exemple #9
0
def phonetic(data,text):


    def clean_text(raw_text):
        regex = re.compile('[^a-zA-Z\s:]')
        # First parameter is the replacement, second parameter is your input string
        filtered_text = regex.sub('', raw_text)
        # Eliminate multiple spaces
        filtered_text = re.sub(r'[\s]+', ' ', filtered_text)
        # Strip out terminal and leading spaces
        return filtered_text.strip()


    words = data
    sanitized_text=clean_text(text)


    token = word_tokenize(sanitized_text)
    # load stop words
    stop_words = stopwords.words('english')

    # Remove stop words
    token = [word for word in token if word not in stop_words]
    n = len(token)

    soundex = Soundex()
    metaphone = Metaphone()
    rs = RefinedSoundex()
    fs = FuzzySoundex()
    algorithms = [soundex, metaphone, rs, fs]

    cc = dict()
    # conversion of list of tuple to list of list
    for i in range(1, n):
        ngram_list = list(nltk.ngrams(token, i))
        ngram = [" ".join(i) for i in ngram_list]
        cc[str(i)] = ngram

    ngrams = sum(cc.values(), [])
    ngrams = [item for item in ngrams if not item.isdigit()]



    dict1 = dict()


    # Iterating over values
    for i in ngrams:
        for j in words:

            total = 0
            for entry in algorithms:
                code1 = entry.phonetics(i)
                code2 = entry.phonetics(j)

                similar = entry.sounds_like(i,j)
                if similar == True:
                    total += 1

            if total >= 3:
                dict1[str(i)] = j


            total = 0

    dict1= dict(reversed(list(dict1.items())))
    print(dict1)

    def multipleReplace(sentence, dict):
        punct= list(string.punctuation)
        punct=[i for i in punct if i not in [","]]
        punct="".join(punct)
        a=[]
        for i in sentence.split():
            j=i.translate(str.maketrans('','',punct))
            a.append(j)
        sentence=' '.join(a)


        """
        take a text and replace words that match the key in a dictionary
        with the associated value, return the changed text
        """
        for key in dict:
            sentence = sentence.replace(key, dict[key])
        return sentence

    result= multipleReplace(text, dict1)
    print(result)

    return result
# -- weight
weight = {
    "soundex": 0.2,
    "caverphone": 0.2,
    "metaphone": 0.5,
    "nysiis": 0.1
}

# -- algorithms
algorithms = ["soundex", "caverphone", "metaphone", "nysiis"]

# -- total
total = 0.0
for entry in algorithms:
    code1 = codeList1[entry]
    code2 = codeList2[entry]
    lev = levenshtein (code1, code2)
    currentWeight = weight[entry]
    print ("comparing %s with %s for %s (%0.2f: weight %0.2f)" % (code1, code2, entry, lev, currentWeight))
    subtotal = lev * currentWeight
    total += subtotal

print ("total: %0.2f" % total)
"""

from pyphonetics import Soundex
soundex = Soundex()
print(soundex.phonetics('Lucky'))
print(soundex.phonetics('Lucki'))
print(soundex.sounds_like('Rizvee', 'Rizvi'))
Exemple #11
0
def insert_drug_database(drug_name):

    drug_id = 0
    if drug_name:
        soundex = Soundex()
        metaphone = Metaphone()
        rs = RefinedSoundex()
        file_path = "utils//DRUGS_ALL_EDITTED.csv"
        file_path = pkg_resources.resource_filename(__name__, file_path)
        file = open(file_path, "r")
        section = file.read()
        parts = re.split('[\n]', section)
        min_dist = 100
        new_name = re.sub("چ", "غ", drug_name)
        new_name = re.sub("ﻏ", "غ", new_name)
        new_name = normalize_arabic(new_name)
        name_en = translate_drug_name(drug_name)
        equals = []
        min_index = -1
        min_dist_all = 100
        min_index_all = -1
        chosen = False

        for part in parts:

            if distance_words(name_en, part) == 0 or distance_words(name_en, part) == 1:
                chosen = True
                print(" Matched To ->", part)
                drug_id = insert_drug(drug_name, normalize_arabic(drug_name), part)
                return drug_id, drug_name

            dist = rs.distance(name_en, part)
            if dist <min_dist_all:
                min_dist_all = dist
                min_index_all = parts.index(part)

            if soundex.sounds_like(new_name, part) or soundex.sounds_like(name_en, part):

                if rs.distance(new_name, part) < min_dist:
                    min_dist = rs.distance(new_name, part)
                    min_index = parts.index(part)
                equals.append((part,metaphone.phonetics(part)))

        if min_index != -1:
            for equ in equals:
                if equ[1] == metaphone.phonetics(name_en) or equ == metaphone.phonetics(new_name):
                    drug_id = insert_drug(drug_name, normalize_arabic(drug_name), equ[0])
                    chosen = True
                    return drug_id, drug_name

        if not chosen and min_index != -1:
            chosen = True
            drug_id = insert_drug(drug_name, normalize_arabic(drug_name), parts[min_index])
            return drug_id, drug_name

        if not chosen:
            drug_id = insert_drug(drug_name, normalize_arabic(drug_name), parts[min_index_all])
            return drug_id, drug_name

    else:
        drug_id = insert_drug("----------", "----------", "----------")
        drug_name = "default"
        return drug_id, drug_name

    return drug_id, drug_name