Example #1
0
def main():

    args = get_args()

    # path to the text file of the vocabulary and create a dictionary
    vocabulary_path = args['vocabulary']
    vocabulary = create_dictionary(vocabulary_path)

    # decide which distance to use
    if args['distance'] == 'levenshtein':
        # Levenshtein distance
        from levenshtein import levenshtein_distance
        levenshtein_distance(get_misspelling(), 'lexique.json')

    elif args['distance'] == 'levenshtein2':
        # Levenshtein distance using another library
        from levenshtein_v2 import levenshtein_distance2
        levenshtein_distance2(get_misspelling(), vocabulary)

    elif args['distance'] == 'hamming':
        # Hamming distance
        from hamming import hamming_distance
        hamming_distance(get_misspelling(), vocabulary)

    elif args['distance'] == 'jarowinkler':
        # Jaro-Winkler
        from jarowinkler import jarowinkler_distance
        jarowinkler_distance(get_misspelling(), vocabulary)

    else:
        raise Exception("Unknown distance function : {}".format(
            args['distance']))
 def calc_levenshtein_distance(self) -> int:
     distances = []
     for key in self.lang1_list.keys():
         distances.append(
             levenshtein.levenshtein_distance(self.lang1_list[key],
                                              self.lang2_list[key],
                                              self.interchangeables))
     return numpy.average(distances)
Example #3
0
    def load_err_statistics(self, err_filename):
        with codecs.open(err_filename, encoding='utf-8') as errors_file:
            errors = [
                line.strip().split(';') for line in errors_file.readlines()
            ]

        for w, c in errors:
            dist = levenshtein_distance(w, c)
            self.p_wc[dist * 4] += 1

        for dist, p in self.p_wc.items():
            self.p_wc[dist * 4] = float(p) / len(errors)

        self.p_wc[0] = 1
Example #4
0
    def correct_error(self, w):
        edits1 = self.edits(w)
        if len(w) > 4:
            edits2 = set([e2 for e1 in edits1 for e2 in self.edits(e1)])
            candidates = filter(lambda x: x in self.words, edits1 | edits2)
        else:
            candidates = filter(lambda x: x in self.words, edits1)

        corrections = []

        for candidate in candidates:
            dist = levenshtein_distance(w, candidate)
            prob = self.calc_p_wc(dist) * self.p_c[candidate]
            corrections.append((candidate, prob))

        return sorted(corrections, key=lambda x: x[1], reverse=True)
def imagediff(method, file_name1, file_name2):

    if method == "file size":
        try:
            size1 = os.path.getsize(file_name1)
        except os.error:
            print >> sys.stderr, "ERROR: Unable to access ", file_name1
            sys.exit(-1)
        try:
            size2 = os.path.getsize(file_name2)
        except os.error:
            print >> sys.stderr, "ERROR: Unable to access ", file_name2

        return float(abs(size1 - size2)) / max(size1, size2)

    else:
        try:
            file1 = open(file_name1, "r")
            string1 = file1.read()
        except IOError:
            print >> sys.stderr, "ERROR: Unable to open ", file_name1
        finally:
            file1.close()

        try:
            file2 = open(file_name2, "r")
            string2 = file2.read()
        except IOError:
            print >> sys.stderr, "ERROR: Unable to open ", file_name2
        finally:
            file2.close()

        if method == "levenshtein":
            try:
                return float(levenshtein_distance(string1, string2)) / max(len(string1), len(string2))
            except ZeroDivisionError:
                return 1
        elif method == "hamming":
            try:
                return float(hamming_distance(string1, string2)) / min(len(string1), len(string2))
            except ZeroDivisionError:
                return 1
        else:
            print >> sys.stderr, "ERROR: Invalid method."
            sys.exit(-1)
Example #6
0
def main():
    # Histogram normalization
    stretch_histogram("imgs/family-bad_contrast.jpg")
    stretch_histogram_quantile("imgs/family-bad_contrast.jpg")
    equalize_histogram("imgs/family-bad_contrast.jpg")

    # Fourier transformation
    discrete_fourier_transform("imgs/waterfall_jam.jpg")

    # Apply filters
    convolve("imgs/waterfall_jam.jpg", laplace_alt())

    # Levenshtein distance
    s = [
        'if there is no rain in April you will have a great summer',
        'no rain in april then great summer come',
        'there is rain in April you have summer',
        'in April no rain you have summer great',
        'there is no rain in apple a great summer comes',
        'you have a great summer comes if there is no rain in April'
    ]
    t = [_.split() for _ in s]
    print "The word edit distances are %s ,the character distances are %s and the word distances having double-punishment for substitutions are %s" % (
        [
            levenshtein_distance(t[0], t[1]),
            levenshtein_distance(t[0], t[2]),
            levenshtein_distance(t[0], t[3]),
            levenshtein_distance(t[0], t[4]),
            levenshtein_distance(t[0], t[5])
        ], [
            levenshtein_distance(s[0], s[1]),
            levenshtein_distance(s[0], s[2]),
            levenshtein_distance(s[0], s[3]),
            levenshtein_distance(s[0], s[4]),
            levenshtein_distance(s[0], s[5])
        ], [
            levenshtein_distance(t[0], t[1], 2),
            levenshtein_distance(t[0], t[2], 2),
            levenshtein_distance(t[0], t[3], 2),
            levenshtein_distance(t[0], t[4], 2),
            levenshtein_distance(t[0], t[5], 2)
        ])

    # Rotation matrix around x with 180 degrees
    mat = rotation_matrix(180, 0, 0)
    print("Rotation matrix: %s" % mat)

    # Get quaternion from existing rotation matrix
    m = np.array([[0, 0.5 * np.sqrt(3), 0.5], [0, -0.5, 0.5 * np.sqrt(3)],
                  [1, 0, 0]])  # matrix of kogsys exam SS2012 task 2
    quaternion = rotation_matrix_to_quaternion(m)
    print("Quaternion: %s" % (quaternion, ))
 def test_levenshtein_distance(self):
     for s1, s2, d in self.lev_strings_to_test:
         self.assertEqual(levenshtein.levenshtein_distance(s1, s2), d)
#!/usr/bin/python

from levenshtein import levenshtein_distance
from levenshtein_reduced_memory import levenshtein_distance as l_distance
from damerau_levenshtein import damerau_levenshtein_distance
from damerau_levenshtein_reduced_memory import damerau_levenshtein_distance as dl_distance
from reduced_levenshtein import levenshtein
from dl import levenshtein as l_dist

print("Testing Levenshtein distance")
print(levenshtein_distance("hello", "hell") == 1)
print(levenshtein_distance("aie", "aei") == 2)
print(levenshtein_distance("aie", "aei") == 2)

#print("Testing Damerau-Levenshtein distance")
#print(damerau_levenshtein_distance("hello", "hell") == 1)
#print(damerau_levenshtein_distance("aie", "aei") == 1)  # Testing transposition working

print("Testing Levenshtein distance, pythonic reduced memory algorithm")
print(levenshtein_distance("hello", "hell") == 1)
print(levenshtein_distance("aie", "aei") == 2)
print(levenshtein_distance("aie", "aei") == 2)

print("Testing Levenshtein distance, pythonic reduced memory algorithm other")
print(l_dist("hello", "hell") == 1)
print(l_dist("aie", "aei") == 2)
print(l_dist("aie", "aei") == 2)

print("Testing Levenshtein distance, reduced memory algorithm")
print(l_distance("hello", "hell") == 1)
print(l_distance("aie", "aei") == 2)
Example #9
0
def p_wc(word, correct):
    dist = levenshtein_distance(word, correct)
    return 1 - float(dist) / len(word)