def seq_matcher(name1, name2): name1 = unicode( unicodedata.normalize('NFKD', name1).encode('ascii', 'ignore'), 'utf-8') name2 = unicode(name2, 'utf-8') name2 = unicode( unicodedata.normalize('NFKD', name2).encode('ascii', 'ignore'), 'utf-8') soundex = fuzzy.Soundex(4) name1 = soundex(name1) name2 = soundex(name2) # dmeta = fuzzy.DMetaphone() # name1 = dmeta(name1)[0] # name2 = dmeta(name2)[0] # name1 = fuzzy.nysiis(name1) # name2 = fuzzy.nysiis(name2) m = SequenceMatcher(None, name1, name2) # Calculate an edit distance"abcef" # print 'm',m.ratio() e = editdist.distance(name1, name2) # print 'e',e sm = StringMatcher(seq1=name1, seq2=name2) # return e # print sm.distance() return sm.distance()
def processText(self, text1, text2): if(text1 == text2) : return 0. String_test = StringMatcher() String_test.set_seqs(text1, text2) dist = String_test.distance() # dist = tds.levenshtein(text1,text2) return float(dist) / max(len(text1),len(text2))
def processText(self, text1, text2): if (text1 == text2): return 0. String_test = StringMatcher() String_test.set_seqs(text1, text2) dist = String_test.distance() # dist = tds.levenshtein(text1,text2) return float(dist) / max(len(text1), len(text2))
def editdist(self,string1,string2): a=StringMatcher(None,string1.lower(),string2.lower()) return a.distance()