def findMatch(line): words = phrases.splitWord(line) alphabets = phrases.get_english_phrase(words) nonalphabets = phrases.generate_katakana_phrase(words) romanized = [] for nonalphabet in nonalphabets: romanized.append(to_romaji(nonalphabet)) dim = (len(alphabets), len(romanized)) similarity = numpy.zeros(dim) for i in range(len(alphabets)): for j in range(len(romanized)): similarity[i][j] = Levenshtein.distance( alphabets[i], romanized[j]) / ( float(min(len(alphabets[i]), len(romanized[j]))) + 1) ans = [] # if dim[1] > 0: # for i in range(dim[0]): # #if min(similarity[i,:]) < 0.5: # j = numpy.argmin(similarity[i,:]) # ans.append((alphabets[i],nonalphabets[j], similarity[i][j], line)) # return ans for i in range(min(dim[0], dim[1])): row_index = similarity.argmin() / similarity.shape[1] col_index = similarity.argmin() % similarity.shape[1] ans.append((alphabets[row_index], nonalphabets[col_index], similarity[row_index, col_index], line)) del alphabets[row_index] del nonalphabets[col_index] similarity = numpy.delete(similarity, row_index, 0) similarity = numpy.delete(similarity, col_index, 1) return ans
def findMatch(line): words = phrases.splitWord(line) alphabets = phrases.get_english_phrase(words) nonalphabets = phrases.generate_katakana_phrase(words) romanized = [] for nonalphabet in nonalphabets: romanized.append(to_romaji(nonalphabet)) dim = (len(alphabets), len(romanized)) similarity = numpy.zeros(dim) for i in range(len(alphabets)): for j in range(len(romanized)): similarity[i][j] = Levenshtein.distance(alphabets[i],romanized[j]) / (float(min(len(alphabets[i]), len(romanized[j])))+1) ans = [] # if dim[1] > 0: # for i in range(dim[0]): # #if min(similarity[i,:]) < 0.5: # j = numpy.argmin(similarity[i,:]) # ans.append((alphabets[i],nonalphabets[j], similarity[i][j], line)) # return ans for i in range(min(dim[0], dim[1])): row_index = similarity.argmin() / similarity.shape[1] col_index = similarity.argmin() % similarity.shape[1] ans.append((alphabets[row_index],nonalphabets[col_index], similarity[row_index,col_index],line)) del alphabets[row_index] del nonalphabets[col_index] similarity = numpy.delete(similarity, row_index, 0) similarity = numpy.delete(similarity, col_index, 1) return ans
def findMatch(line): words = phrases.splitWord(line) alphabets = phrases.get_english_phrase(words) nonalphabets = phrases.generate_katakana_phrase(words) romanized = [] for nonalphabet in nonalphabets: romanized.append(romkan.to_roma(nonalphabet)) dim = (len(alphabets), len(romanized)) similarity = numpy.zeros(dim) for i in range(len(alphabets)): for j in range(len(romanized)): similarity[i][j] = distance.euclidean(vectorize(alphabets[i]),vectorize(romanized[j])) ans = [] if dim[1] > 0: for i in range(dim[0]): if min(similarity[i,:]) < 0.5: j = numpy.argmin(similarity[i,:]) ans.append((alphabets[i],nonalphabets[j], line)) return ans
def findMatch(line): words = phrases.splitWord(line) alphabets = phrases.get_english_phrase(words) nonalphabets = phrases.generate_katakana_phrase(words) romanized = [] for nonalphabet in nonalphabets: romanized.append(romkan.to_roma(nonalphabet)) dim = (len(alphabets), len(romanized)) similarity = numpy.zeros(dim) for i in range(len(alphabets)): for j in range(len(romanized)): similarity[i][j] = distance.euclidean(vectorize(alphabets[i]), vectorize(romanized[j])) ans = [] if dim[1] > 0: for i in range(dim[0]): if min(similarity[i, :]) < 0.5: j = numpy.argmin(similarity[i, :]) ans.append((alphabets[i], nonalphabets[j], line)) return ans