def main(): args = get_args() # path to the text file of the vocabulary and create a dictionary vocabulary_path = args['vocabulary'] vocabulary = create_dictionary(vocabulary_path) # decide which distance to use if args['distance'] == 'levenshtein': # Levenshtein distance from levenshtein import levenshtein_distance levenshtein_distance(get_misspelling(), 'lexique.json') elif args['distance'] == 'levenshtein2': # Levenshtein distance using another library from levenshtein_v2 import levenshtein_distance2 levenshtein_distance2(get_misspelling(), vocabulary) elif args['distance'] == 'hamming': # Hamming distance from hamming import hamming_distance hamming_distance(get_misspelling(), vocabulary) elif args['distance'] == 'jarowinkler': # Jaro-Winkler from jarowinkler import jarowinkler_distance jarowinkler_distance(get_misspelling(), vocabulary) else: raise Exception("Unknown distance function : {}".format( args['distance']))
def calc_levenshtein_distance(self) -> int: distances = [] for key in self.lang1_list.keys(): distances.append( levenshtein.levenshtein_distance(self.lang1_list[key], self.lang2_list[key], self.interchangeables)) return numpy.average(distances)
def load_err_statistics(self, err_filename): with codecs.open(err_filename, encoding='utf-8') as errors_file: errors = [ line.strip().split(';') for line in errors_file.readlines() ] for w, c in errors: dist = levenshtein_distance(w, c) self.p_wc[dist * 4] += 1 for dist, p in self.p_wc.items(): self.p_wc[dist * 4] = float(p) / len(errors) self.p_wc[0] = 1
def correct_error(self, w): edits1 = self.edits(w) if len(w) > 4: edits2 = set([e2 for e1 in edits1 for e2 in self.edits(e1)]) candidates = filter(lambda x: x in self.words, edits1 | edits2) else: candidates = filter(lambda x: x in self.words, edits1) corrections = [] for candidate in candidates: dist = levenshtein_distance(w, candidate) prob = self.calc_p_wc(dist) * self.p_c[candidate] corrections.append((candidate, prob)) return sorted(corrections, key=lambda x: x[1], reverse=True)
def imagediff(method, file_name1, file_name2): if method == "file size": try: size1 = os.path.getsize(file_name1) except os.error: print >> sys.stderr, "ERROR: Unable to access ", file_name1 sys.exit(-1) try: size2 = os.path.getsize(file_name2) except os.error: print >> sys.stderr, "ERROR: Unable to access ", file_name2 return float(abs(size1 - size2)) / max(size1, size2) else: try: file1 = open(file_name1, "r") string1 = file1.read() except IOError: print >> sys.stderr, "ERROR: Unable to open ", file_name1 finally: file1.close() try: file2 = open(file_name2, "r") string2 = file2.read() except IOError: print >> sys.stderr, "ERROR: Unable to open ", file_name2 finally: file2.close() if method == "levenshtein": try: return float(levenshtein_distance(string1, string2)) / max(len(string1), len(string2)) except ZeroDivisionError: return 1 elif method == "hamming": try: return float(hamming_distance(string1, string2)) / min(len(string1), len(string2)) except ZeroDivisionError: return 1 else: print >> sys.stderr, "ERROR: Invalid method." sys.exit(-1)
def main(): # Histogram normalization stretch_histogram("imgs/family-bad_contrast.jpg") stretch_histogram_quantile("imgs/family-bad_contrast.jpg") equalize_histogram("imgs/family-bad_contrast.jpg") # Fourier transformation discrete_fourier_transform("imgs/waterfall_jam.jpg") # Apply filters convolve("imgs/waterfall_jam.jpg", laplace_alt()) # Levenshtein distance s = [ 'if there is no rain in April you will have a great summer', 'no rain in april then great summer come', 'there is rain in April you have summer', 'in April no rain you have summer great', 'there is no rain in apple a great summer comes', 'you have a great summer comes if there is no rain in April' ] t = [_.split() for _ in s] print "The word edit distances are %s ,the character distances are %s and the word distances having double-punishment for substitutions are %s" % ( [ levenshtein_distance(t[0], t[1]), levenshtein_distance(t[0], t[2]), levenshtein_distance(t[0], t[3]), levenshtein_distance(t[0], t[4]), levenshtein_distance(t[0], t[5]) ], [ levenshtein_distance(s[0], s[1]), levenshtein_distance(s[0], s[2]), levenshtein_distance(s[0], s[3]), levenshtein_distance(s[0], s[4]), levenshtein_distance(s[0], s[5]) ], [ levenshtein_distance(t[0], t[1], 2), levenshtein_distance(t[0], t[2], 2), levenshtein_distance(t[0], t[3], 2), levenshtein_distance(t[0], t[4], 2), levenshtein_distance(t[0], t[5], 2) ]) # Rotation matrix around x with 180 degrees mat = rotation_matrix(180, 0, 0) print("Rotation matrix: %s" % mat) # Get quaternion from existing rotation matrix m = np.array([[0, 0.5 * np.sqrt(3), 0.5], [0, -0.5, 0.5 * np.sqrt(3)], [1, 0, 0]]) # matrix of kogsys exam SS2012 task 2 quaternion = rotation_matrix_to_quaternion(m) print("Quaternion: %s" % (quaternion, ))
def test_levenshtein_distance(self): for s1, s2, d in self.lev_strings_to_test: self.assertEqual(levenshtein.levenshtein_distance(s1, s2), d)
#!/usr/bin/python from levenshtein import levenshtein_distance from levenshtein_reduced_memory import levenshtein_distance as l_distance from damerau_levenshtein import damerau_levenshtein_distance from damerau_levenshtein_reduced_memory import damerau_levenshtein_distance as dl_distance from reduced_levenshtein import levenshtein from dl import levenshtein as l_dist print("Testing Levenshtein distance") print(levenshtein_distance("hello", "hell") == 1) print(levenshtein_distance("aie", "aei") == 2) print(levenshtein_distance("aie", "aei") == 2) #print("Testing Damerau-Levenshtein distance") #print(damerau_levenshtein_distance("hello", "hell") == 1) #print(damerau_levenshtein_distance("aie", "aei") == 1) # Testing transposition working print("Testing Levenshtein distance, pythonic reduced memory algorithm") print(levenshtein_distance("hello", "hell") == 1) print(levenshtein_distance("aie", "aei") == 2) print(levenshtein_distance("aie", "aei") == 2) print("Testing Levenshtein distance, pythonic reduced memory algorithm other") print(l_dist("hello", "hell") == 1) print(l_dist("aie", "aei") == 2) print(l_dist("aie", "aei") == 2) print("Testing Levenshtein distance, reduced memory algorithm") print(l_distance("hello", "hell") == 1) print(l_distance("aie", "aei") == 2)
def p_wc(word, correct): dist = levenshtein_distance(word, correct) return 1 - float(dist) / len(word)