def compare(self, string1, string2): """ Compare strings using soundex if not possible givees biggram avearage. :param str1: string 1 for comparison. :type str1: str. :param str2: string 2 for comparison :type str2: str. :returns: int score between 0.0 and 1.0 """ weight = 0 if string1 == string2: return 1 sx = soundex.getInstance() soundex_match = sx.compare(string1, string2) if soundex_match == 0: weight = 1.0 if soundex_match == 1: weight = 0.9 if soundex_match == 2: weight = 0.8 if weight == 0: return self.bigram_average(string1, string2) else: return weight
def compare(self, string1, string2): weight = 0 if string1 == string2: return 1 sx = soundex.getInstance() soundex_match = sx.compare(string1, string2) if soundex_match == 0: weight = 1.0 if soundex_match == 1: weight = 0.9 if soundex_match == 2: weight = 0.8 if weight == 0: return self.bigram_average(string1, string2) else: return weight
def compare (self, string1, string2): weight = 0 if string1 == string2 : return 1 sx = soundex.getInstance() soundex_match = sx.compare(string1,string2) if soundex_match == 0 : weight = 1.0 if soundex_match == 1 : weight = 0.9 if soundex_match == 2 : weight = 0.8 if weight == 0 : return self.bigram_average(string1,string2) else : return weight
class Resident: is_at_home = False s = soundex.getInstance() def __init__(self, text_name, registered_names, twitter_impl, log='../logging/smart_doorbell.full.log'): # A list of sound files containing pronunciations of this resident's name # Examples: 'Matthew', 'Matt', 'Matty' 'Mr. Smith' self.registered_names = registered_names self.text_name = text_name self.t = twitter_impl self.dictophone = SpeechRecogniser(log=log) logging.basicConfig(filename=log, level=logging.DEBUG) def alert_visitor_at_door(self, visitor_name_audio): if self.is_at_home: self.request_answer_door() else: self.send_remote_notification(visitor_name_audio) def request_answer_door(self): self.t.post_direct_message("Please answer the door") def send_remote_notification(self, visitor_name_audio_text='Somebody', recorded_message_audio_text='(blank)', image_file_path=None): if visitor_name_audio_text is self.dictophone.UNRECOGNISED: visitor_name_audio_text = "Somebody" message_format = "{} visited the house and left a message: {}"\ .format(visitor_name_audio_text, recorded_message_audio_text) self.t.post_direct_message_with_image(message_format, image_file_path) def requested_name_matches_this_resident(self, requested_name_text): logging.info("Trying to match audio against resident %s", self.text_name) stripped = requested_name_text.lower().replace(" ", "") for registered_name in self.registered_names: if registered_name.lower().replace(" ", "") == stripped: return True return False def set_resident_at_home(self, at_home): self.is_at_home = at_home
def evaluateField(self, logging, orgText, findText, threshold): #print ('orgText : ',orgText,' findText :',findText,' threshold: ',threshold) inVar = False soundVar = False cosDis = False similarVar = False distVar = False inVar = orgText in findText s = soundex.getInstance() soundVar = s.soundex(orgText) == s.soundex(findText) #logging.info ('inVar ',inVar) #logging.info ('soundVar ',soundVar) if (inVar == True or soundVar == True): return orgText else: w1 = self.word2vec(str(orgText)) w2 = self.word2vec(str(findText)) #logging.info ('cosdis :',self.cosdis(w1,w2)) if self.cosdis(w1, w2) > float(threshold): cosDis = True #logging.info ('seq match :',SequenceMatcher(a=orgText,b=findText).ratio()) if SequenceMatcher(a=orgText, b=findText).ratio() > float(threshold): similarVar = True #print ('levenshtein:',distance.levenshtein(orgText,findText)) if distance.levenshtein(orgText, findText) <= 8: distVar = True if (cosDis == True and similarVar == True and distVar == True): return orgText else: return ""
def __init__(self): self.sx = soundex.getInstance()
def setUp(self): self.s = getInstance()
def checkIfAvailable(self, logging, field_Name, single_word, single_line_cpy, detail_dict, config): #logging.info('+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++') #logging.info('field_Name : ',field_Name) #logging.info('single_word : ',single_word) #logging.info('single_line_cpy : ',single_line_cpy) if (self.getPrimaryORSecondary(field_Name, config) == 'Y'): #logging.info('This is primary !!') #if primary return the word just like that return single_word, None else: #logging.info('This is secondary !!') #if we have before or after need to check if that field is gathered corretly secodary_dict = self.getSencondaryDetails(field_Name, config) dependency_field_Name = secodary_dict['dependency_field_Name'] field_postion = "" for details_ind in detail_dict: #logging.info('details_ind : ',details_ind) if (details_ind['field_Name'] == dependency_field_Name): field_postion = details_ind['postion_extracted'] how_many_section = secodary_dict['how_many_section'] #logging.info('field_postion : ',field_postion) #logging.info('how_many_section : ',how_many_section) #if secondary parse the word need to check if its before or after a word #if they are gathered +1 for before and -1 for after if (field_postion != None): if secodary_dict['access_type'] == 'before': #logging.info('before method') if field_postion - int(how_many_section) >= 0: inVar = False soundVar = False lineVar = False inVar = field_Name in single_line_cpy[ field_postion - int(how_many_section)] s = soundex.getInstance() soundVar = s.soundex(field_Name) == s.soundex( single_line_cpy[field_postion - int(how_many_section)]) if (inVar == True or soundVar == True): return field_Name, field_postion - int( how_many_section) else: for single_rec_Num in range( 0, len(single_line_cpy)): lineVar = field_Name in single_line_cpy[ single_rec_Num] if (lineVar == True): return field_Name, single_rec_Num else: return None, None else: return None, None elif secodary_dict['access_type'] == 'after': if field_postion - int(how_many_section) >= 0: inVar = False soundVar = False inVar = field_Name in single_line_cpy[ field_postion + int(how_many_section)] s = soundex.getInstance() soundVar = s.soundex(field_Name) == s.soundex( single_line_cpy[field_postion + int(how_many_section)]) if (inVar == True or soundVar == True): return field_Name, field_postion + int( how_many_section) else: for single_rec_Num in range( 0, len(single_line_cpy)): lineVar = field_Name in single_line_cpy[ single_rec_Num] if (lineVar == True): return field_Name, single_rec_Num else: return None, None else: return None, None else: return None, None return None, None
import re import codecs import numpy import fuzzy import abydos import chardet import sys, os import os.path from abydos.phonetic import bmpm, russell_index, dm_soundex, metaphone from soundex import getInstance from metaphone.metaphone import doublemetaphone from collections import Counter import shutil sdx = getInstance() double_metaphone = False dict_replaceby = {} dict_phonemes = {} #------------------------------------------------------------------------------- def carga_tabla_phon(filename): #tablename = nltk.defaultdict(lambda: 1) tablename = {} with open(filename, 'rU') as lineas: for row in csv.reader(lineas, delimiter=','): tablename[row[0]] = row[1] tablename[row[0] + "_phon"] = row[2] return tablename
def setUp(self): self.ies = getInstance() self.sndx = soundex.getInstance()
def setUp(self): super(InexactSearchTest, self).setUp() self.ies = InexactSearch() self.sndx = soundex.getInstance()
import codecs import numpy import fuzzy import abydos import chardet import sys, os import os.path from abydos.phonetic import bmpm, russell_index, dm_soundex, metaphone from soundex import getInstance from metaphone.metaphone import doublemetaphone from collections import Counter import shutil sdx = getInstance() double_metaphone=False dict_replaceby={} dict_phonemes={} #------------------------------------------------------------------------------- def carga_tabla_phon(filename): #tablename = nltk.defaultdict(lambda: 1) tablename = {} with open(filename, 'rU') as lineas: for row in csv.reader(lineas, delimiter=','): tablename[row[0]]= row[1] tablename[row[0]+"_phon"]= row[2] return tablename #------------------------------------------------------------------------------- # End of Function
#sys.path.append('/Library/Python/2.7/site-packages/soundex') import soundex a = 'ఆదియందు' b = 'ಆದಿಯಲ್ಲಿ' #print soundex.compare(a,b) #soundex.Soundex.silpaService = soundex.getInstance() #print silpaService.Soundex.compare(a,b) import unittest from soundex import getInstance silpaService = getInstance() print silpaService.soundex(u'ఆదియందు') print silpaService.soundex(u'ಆದಿಯಲ್ಲಿ') print silpaService.soundex(u'దేవుడు') print silpaService.soundex(u'ದೇವರು') print silpaService.compare(u'ఆదియందు', u'ಆದಿಯಲ್ಲಿ') print ngram.NGram.compare(u'ఆదియందు', u'ಆದಿಯಲ್ಲಿ') print ngram.NGram.compare(u'దేవుడు', u'ದೇವರು', N=1) print ngram.NGram.compare('span', 'spam') # G = ngram.NGram([u'దేవుడు',u'దేవత',u'వేలుపు']) # #print G.find(u'ದೇವರು') # s1 = [] # s2 = [] # #print u'దేవుడు'.encode('ascii', 'ignore')
from ..request.crud.base import CRUDElement from ..request.crud.create import Create from ..request.crud.delete import Delete from ..request.crud.read import Read from ..request.crud.update import Update from ..request.expr import Expression, Function __all__ = [ 'PyDriver', 'processcrud', 'processquery', 'processcreate', 'processread', 'processupdate', 'processdelete', 'FunctionChooser' ] VERSION = '0.1' #: python driver version. soundex = getInstance().soundex DTF = '%Y-%m-%d %H:%M:%S' #: date time format def processcreate(items, create, ctx=None, **kwargs): """Apply input Create element to items. :param list items: items to process with input Create. :param Create create: data to add to input items. :return: created item. :rtype: list """ return _GLOBALPYDRIVER.processcreate( create=create, ctx=ctx, items=items, **kwargs )