Python edit_distance Examples, edit_distance.edit_distance Python Examples

Example #1

0

Show file

 def test_edit_distance4(self):
     """Test edit distance against an empty list."""
     a = []
     b = ['a', 'c']
     self.assertTrue(edit_distance(a, b) == (2, 0))
     self.assertTrue(edit_distance(b, a) == (2, 0))
     self.assertTrue(edit_distance(a, a) == (0, 0))

Example #2

0

Show file

File: expandedannotation.py Project: machacek/segranks

    def better_worse_without_fuzzy(self, system):
        from edit_distance import edit_distance
        
        distance = 0
        rank_cmp = 0
        comparisons = []

        try:
            original_rank = self.system_indexed[system].rank

            system_segment = self.system_indexed[system].segment
            system_indexed_copy = dict(self.system_indexed)
            del system_indexed_copy[system]
            closest_system = min(system_indexed_copy, key=lambda x: edit_distance(system_segment, system_indexed_copy[x].segment))
            closest_rank = system_indexed_copy[closest_system].rank
            system_indexed_copy[system] = SegmentRank(segment=system_segment, rank=closest_rank)


            closest_segment = system_indexed_copy[closest_system].segment
            distance = edit_distance(system_segment, closest_segment)

            rank_cmp = cmp(closest_rank, original_rank)
            #if closest_segment != system_segment:
            #    comp = "\\better{}" if closest_rank < original_rank else "\\worse{}" if closest_rank > original_rank else "\\equal{}"
            #    print(system_segment.encode('utf-8'), "&", closest_segment.encode('utf-8'), "&", distance, "&", comp, "\\\\")
            


            for system1, segment_rank1 in system_indexed_copy.items():
                for system2, segment_rank2 in system_indexed_copy.items():
                    if segment_rank1.rank < segment_rank2.rank:
                        comparisons.append((system1, system2))


        except KeyError:
            #print("KeyError")
            pass
        except ValueError:
            #print("ValueError")
            pass

        rank_cmp = -1 if rank_cmp < 0 else 1 if rank_cmp > 0 else 0
        return rank_cmp, distance, comparisons

Example #3

0

Show file

File: evaluation_util.py Project: belambert/asr-tools

def evaluate(ref_table, s):
    """Given a sentence and a reference table, create and return an
    Evaluation object. Save a copy in the sentence."""
    ref = ref_table.get(s.id_)
    if ref is None:
        raise Exception('No reference loaded for ID: {}'.format(s.id_))
    distance, matches = edit_distance(ref.words, s.words)
    eval_ = Evaluation(len(ref.words), matches, distance)
    s.eval_ = eval_
    return eval_

Example #4

0

Show file

 def test_edit_distance1(self):
     """Test edit distance between 'ab' and 'acdab'."""
     a = ['a', 'b']
     b = ['a', 'c', 'd', 'a', 'b']
     self.assertTrue(edit_distance(a, b) == (3, 2))
     bp_expected_result = (3, 2, [['insert', 0, 0, 0, 1],
                                  ['insert', 0, 0, 1, 2],
                                  ['insert', 0, 0, 2, 3],
                                  ['equal', 0, 1, 3, 4],
                                  ['equal', 1, 2, 4, 5]])
     self.assertTrue(edit_distance_backpointer(a, b) == bp_expected_result)

Example #5

0

Show file

 def test_edit_distance2(self):
     """Test edit distance for 'hi my name is andy'."""
     a = ['hi', 'my', 'name', 'is', 'andy']
     b = ['hi', "i'm", 'my', "name's", 'sandy']
     self.assertTrue(edit_distance(a, b) == (4, 1))
     bp_expected_result = (4, 1, [['equal', 0, 1, 0, 1],
                                  ['replace', 1, 2, 1, 2],
                                  ['replace', 2, 3, 2, 3],
                                  ['replace', 3, 4, 3, 4],
                                  ['replace', 4, 5, 4, 5]])
     self.assertTrue(edit_distance_backpointer(a, b) == bp_expected_result)

Example #6

0

Show file

 def test_edit_distance_highest_match(self):
     """Test edit distance for 'hi my name is andy', maximizing matches rather than
     minimizing edits."""
     a = ['hi', 'my', 'name', 'is', 'andy']
     b = ['hi', "i'm", 'my', "name's", 'sandy']
     self.assertTrue(edit_distance(a, b, action_function=highest_match_action) == (4, 2))
     bp_expected_result = (4, 2, [['equal', 0, 1, 0, 1],
                                  ['insert', 0, 0, 1, 2],
                                  ['equal', 1, 2, 2, 3],
                                  ['delete', 2, 3, 2, 2],
                                  ['replace', 3, 4, 3, 4],
                                  ['replace', 4, 5, 4, 5]])
     self.assertTrue(edit_distance_backpointer(a, b, action_function=highest_match_action) == bp_expected_result)

Example #7

0

Show file

File: class_labels.py Project: AkshitaJha/git-store

def extract_labels(fp):
	cl_labels = []
	for i in fp:
		if i != '\n':

			data = i.split('\t')

			# lemma
			lemma = data[2]
	
			# contains the token
			token = data[1]
			
			# opens a file and appends the tokens
			'''with open('training_tokens.txt','a') as t:
				t.write(token + ' ' + lemma + '\n')
			'''

			# find the edit_distance between token and its lemma
			# appends the class labels
			cl_labels.append(edit_distance(token, lemma))
				
	return cl_labels

Example #8

0

Show file

File: evaluator.py Project: AIRob/calamari

    def evaluate_single(_sentinel=None, gt='', pred=''):
        """ Evaluate a single pair of data

        Parameters
        ----------
        args : ground truth, prediction

        Returns
        -------
        int
            length of ground truth
        int
            number of errors
        int
            number of synchronisation errors
        dict
            confusions dictionary

        """
        if _sentinel is not None:
            raise Exception('Call this function by specifying gt and pred explicitly')

        confusion = {}
        total_sync_errs = 0
        errs, trues = edit_distance(gt, pred)
        synclist = synchronize([gt, pred])
        for sync in synclist:
            gt_str, pred_str = sync.get_text()
            if gt_str != pred_str:
                key = (gt_str, pred_str)
                total_sync_errs += max(len(gt_str), len(pred_str))
                if key not in confusion:
                    confusion[key] = 1
                else:
                    confusion[key] += 1

        return len(gt), errs, total_sync_errs, confusion

Example #9

0

Show file

File: test.py Project: dmitri-mamrukov/coursera-data-structures-and-algorithms

 def test_with_empty_and_one_char_word(self):
     word1 = ''
     word2 = 'a'
     self.assertEqual(1, edit_distance(word1, word2))

Example #10

0

Show file

File: hiragana_distance.py Project: HSunnyKim/NLP

def hiragana_distance(target,source):
    roma1 = hira_toroma(target)
    roma2 = hira_toroma(source)
    return edit_distance(roma1,roma2)

Example #11

0

Show file

File: test.py Project: dmitri-mamrukov/coursera-data-structures-and-algorithms

 def test_with_same_length_words_with_one_same_char_in_same_position(self):
     word1 = 'abc'
     word2 = 'ayz'
     self.assertEqual(2, edit_distance(word1, word2))

Example #12

0

Show file

File: test.py Project: dmitri-mamrukov/coursera-data-structures-and-algorithms

 def test_with_exponential_and_polynomial(self):
     word1 = 'EXPONENTIAL'
     word2 = 'POLYNOMIAL'
     self.assertEqual(6, edit_distance(word1, word2))

Example #13

0

Show file

File: test.py Project: dmitri-mamrukov/coursera-data-structures-and-algorithms

 def test_with_empty_and_two_char_word(self):
     word1 = ''
     word2 = 'ab'
     self.assertEqual(2, edit_distance(word1, word2))

Example #14

0

Show file

File: test.py Project: dmitri-mamrukov/coursera-data-structures-and-algorithms

 def test_with_same_three_char_words(self):
     word1 = 'abc'
     word2 = 'abc'
     self.assertEqual(0, edit_distance(word1, word2))

Example #15

0

Show file

def test_information_applications():
    assert edit_distance("information", "applications") == 7

Example #16

0

Show file

File: test_edit_distance.py Project: arthur-flam/edit-distance

 def test_change(self):
     "able to find a simple change"
     self.assertEqual(edit_distance("hat", "cat"), 1)

Example #17

0

Show file

File: tests.py Project: abhishekanand10/dataStructurePy3

    f = [[10**9] * (tn + 2) for _ in range(sn + 2)]
    f[0][0] = 0

    def relax(p, q, x):
        f[p][q] = min(f[p][q], x)

    for i in range(sn + 1):
        for j in range(tn + 1):
            if i < sn and j < tn:
                relax(i + 1, j + 1, f[i][j] + (1 if s[i] != t[j] else 0))
            relax(i + 1, j, f[i][j] + 1)
            relax(i, j + 1, f[i][j] + 1)
    return f[sn][tn]


if __name__ == '__main__':
    run_common_tests()
    check_tests_pass("edit_distance_unit_tests.py")

    all_tests_passed = True

    for first, second in (("abacabadabacabaeabacab", "aeabacabad"), ):
        if edit_distance(first, second) != reference(first, second):
            all_tests_passed = False
            failed("Wrong answer for {} and {}".format(first, second))
            break

    if all_tests_passed:
        passed()

Example #18

0

Show file

def main():
    print("Welcome to the Edit Distance Program!")
    print("")
    print(
        "This program will perform three different tests. Let's get started!")
    print("")
    print("Test 1: ")
    #Test case 1
    string_1 = "brand"
    string_2 = "random"
    print("The two words being compared are '", string_1, "' and '", string_2,
          "'.")
    start1 = time.time()
    print(ed.edit_distance(string_1, string_2, len(string_1), len(string_2)))
    end1 = time.time()
    print('Running time for test 1 was: ', end1 - start1, 'seconds.')
    print("")
    print("Test 2: ")
    #Test case 2
    string_1 = "any"
    string_2 = "any"
    print("The two words being compared are '", string_1, "' and '", string_2,
          "'.")
    start2 = time.time()
    print(ed.edit_distance(string_1, string_2, len(string_1), len(string_2)))
    end2 = time.time()
    print('Running time for test 1 was: ', end2 - start2, 'seconds.')
    print("")
    print("Test 3: ")
    #Test case 3
    string_1 = "magnificus"
    string_2 = ""
    print("The two words being compared are '", string_1, "' and '", string_2,
          "'.")
    start3 = time.time()
    print(ed.edit_distance(string_1, string_2, len(string_1), len(string_2)))
    end3 = time.time()
    print('Running time for test 1 was: ', end3 - start3, 'seconds.')
    print("")
    print("Tests completed!")
    print("")
    print("Would you like to try two words for yourself? Yes or No")
    user_selection = input()
    if (user_selection == 'yes' or user_selection == 'Yes'
            or user_selection == 'YES'):
        print("Excellent! Now, which two words would you like to try?")
        print("Word 1: ")
        string_1 = input()
        print("Word 2: ")
        string_2 = input()
        print("The two words being compared are '", string_1, "' and '",
              string_2, "'.")
        start4 = time.time()
        print(
            ed.edit_distance(string_1, string_2, len(string_1), len(string_2)))
        end4 = time.time()
        print('Running time for test 1 was: ', end4 - start4, 'seconds.')
        print("")
        print("Program complete! See you again soon!")
    elif (user_selection == 'no' or user_selection == 'No'
          or user_selection == 'NO'):
        print("Very well.")
        print("Program complete! See you again soon!")
    else:
        print("ERROR! Input invalid!")
        print("Goodbye!")

Example #19

0

Show file

File: test_edit_distance.py Project: arthur-flam/edit-distance

 def test_delete(self):
     "able to find a simple delete"
     self.assertEqual(edit_distance("aaab", "aaa"), 1)

Example #20

0

Show file

File: test_edit_distance.py Project: arthur-flam/edit-distance

 def test_equal(self):
     "handles equal strings"
     self.assertEqual(edit_distance("azerty", "azerty"), 0)

Example #21

0

Show file

File: test_edit_distance.py Project: arthur-flam/edit-distance

 def test_empty(self):
     "handles empty strings"
     self.assertEqual(edit_distance("", "a"), 1)
     self.assertEqual(edit_distance("a", ""), 1)
     self.assertEqual(edit_distance("", ""), 0)

Example #22

0

Show file

File: test.py Project: hokix/jadesoulpp

import edit_distance2
import nltk
from string import ascii_letters

if __name__=='__main__':
    l=list(ascii_letters[:10])
    s=''
    for i in xrange(200):
        shuffle(l)
        s+=''.join(l)
    t=''
    for i in xrange(100):
        shuffle(l)
        t+=''.join(l)
    
    n=20
    print len(s), len(t)
    
    time_init()
    for i in xrange(n):
        b=edit_distance.edit_distance(s, t)
    print time_gap('edit_distance.edit_distance')
    
    for i in xrange(n):
        a=edit_distance2.edit_distance(s, t)
    print time_gap('edit_distance2.edit_distance')
    
    for i in xrange(n):
        a=nltk.edit_distance(s, t)
    print time_gap('nltk.edit_distance')

Example #23

0

Show file

def test_editing_distance_1_1_100():
    assert edit_distance("editing", "distance", 1, 1, 100) == 7

Example #24

0

Show file

 def test_edit_dist_empty_str(self):
     string_a = ""
     string_b = "abc"
     self.assertEqual(edit_distance(string_a, string_b), 3)

Example #25

0

Show file

def test_editing_distance_1_100_1():
    assert edit_distance("editing", "distance", 1, 100, 1) == 6

Example #26

0

Show file

 def test_edit_dist_2(self):
     string_a = "editing"
     string_b = "distance"
     self.assertEqual(edit_distance(string_a, string_b), 5)

Example #27

0

Show file

def test_a_b_100_1_1():
    assert edit_distance("a", "b", 100, 1, 2) == 2

Example #28

0

Show file

File: test.py Project: dmitri-mamrukov/coursera-data-structures-and-algorithms

 def test_with_three_char_word_and_empty(self):
     word1 = 'abc'
     word2 = ''
     self.assertEqual(3, edit_distance(word1, word2))

Example #29

0

Show file

def test_edit_redit():
    assert edit_distance("edit", "redit") == 1

Example #30

0

Show file

File: test.py Project: dmitri-mamrukov/coursera-data-structures-and-algorithms

 def test_with_words_of_different_chars(self):
     word1 = 'abc'
     word2 = 'xyz'
     self.assertEqual(3, edit_distance(word1, word2))

Example #31

0

Show file

def test_a_bb_100_1_1():
    assert edit_distance("a", "bb", 100, 1, 1) == 101

Example #32

0

Show file

File: test.py Project: dmitri-mamrukov/coursera-data-structures-and-algorithms

 def test_with_same_length_words_with_one_same_char_away_by_two(self):
     word1 = 'abc'
     word2 = 'xya'
     self.assertEqual(3, edit_distance(word1, word2))

Example #33

0

Show file

File: evaluation_util.py Project: belambert/asr-tools

def sentence_editdistance(s1, s2):
    """Given two 'sentence' objects compute the edit distance and
    return the distance."""
    distance, _ = edit_distance(s1.words, s2.words)
    return distance

Example #34

0

Show file

File: test.py Project: dmitri-mamrukov/coursera-data-structures-and-algorithms

 def test_with_snowy_and_sunny(self):
     word1 = 'SNOWY'
     word2 = 'SUNNY'
     self.assertEqual(3, edit_distance(word1, word2))

Example #35

0

Show file

File: test_edit_distance.py Project: mmweber2/adm

def test_edit_one_swap():
    assert_equals(edit_distance("cat", "car"), 1)

Example #36

0

Show file

File: test.py Project: dmitri-mamrukov/coursera-data-structures-and-algorithms

 def test_with_atgttata_and_atcgtcc(self):
     word1 = 'ATGTTATA'
     word2 = 'ATCGTCC'
     self.assertEqual(5, edit_distance(word1, word2))

Example #37

0

Show file

File: test_edit_distance.py Project: mmweber2/adm

def test_edit_one_add():
    assert_equals(edit_distance("char", "chair"), 1)

Example #38

0

Show file

    y_pred = np.zeros((t50_num[i]), dtype=np.int32)
    y_v = np.zeros((t50_num[i]), dtype=np.float32)
    for ii in idx:
        y_true[ii] = Y_valid[count]
        y_pred[ii] = Y[count]
        y_v[ii] = Y_value[count]
        count += 1
    acc += sum(y_true == y_pred)
    cc += len(y_true)

    s0 = ""
    for k in range(len(y_true)):
        s0 += map_phone2alpha[map_48to39[map_num2phone[y_true[k]]]]
    s0 = string_compress(s0)

    for it, t in enumerate(threshold):
        s1 = ""
        for k in range(len(y_pred)):
            if y_v[k] > t:
                s1 += map_phone2alpha[map_48to39[map_num2phone[y_pred[k]]]]
        s1 = window_slide(s1, phone_mean_num)
        s1 = string_trim(s1)
        s1 = string_trim2(s1)
        s1 = string_compress(s1)
        dis[it] += edit_distance(s0, s1)

print("Acc:", acc / cc)
dis = dis / len(t50_num)
for i in range(len(threshold)):
    print(threshold[i], dis[i])

Example #39

0

Show file

File: test_edit_distance.py Project: mmweber2/adm

def test_edit_same_string():
    assert_equals(edit_distance("one", "one"), 0)

Example #40

0

Show file

def main():
    start, end = str.split(input())
    print(edit_distance(start, end))

Example #41

0

Show file

File: test_edit_distance.py Project: mmweber2/adm

def test_edit_two_empty_strings():
    assert_equals(edit_distance("", ""), 0)

Example #42

0

Show file

def test_bc_bbcc_100_1_1():
    assert edit_distance("bc", "bbcc", 100, 1, 1) == 200

Example #43

0

Show file

def test_ed(s1, s2, expected_edits):
    ed, actual_edits = edit_distance(s1, s2, transpositions=True)
    assert actual_edits == expected_edits
    assert ed == sum(expected_edits.values())

Example #44

0

Show file

def test_digit_redit():
    assert edit_distance("digit", "redit") == 3

Example #45

0

Show file

def word_confusion(atoms_dissimilarity, max_word_len, beta=1, atoms=None):
    """
    Get word-level confusion probability matrix
    from atomic-signals-level dissimilarities
    for all words under max_word_len length.
    
    Dissimilarities are assumed to be numbers between 0 and 1.
    
    Based on computing edit-distances using the atoms_dissimilarity matrix
    as substitution costs and obtaining probabilities by
    taking exp(-beta*ED(w1, w2)) and renormalising each row to sum to 1.
    Use larger beta get more peaked confusion probabilities
    
    Justification: for same-length words we retrieve (and it's the only way to
    retrieve?) Fletcher's sequential law. Also used in one of the Harvard guy 
    working on evolutionary dynamics paper (1999, not PNAS).
    
    Returns a list of words (based on the atoms argument if it is a list of
    strings or an arbitrary alphabetic mapping if atoms is None) along
    with the confusion probability matrix between these words such that the 
    number on row i column j, corresponds to the probability of having word
    j being received when word i was intended.
    """
    assert np.all(atoms_dissimilarity <= 1)
    assert np.all(atoms_dissimilarity >= 0)

    alphabet_size = atoms_dissimilarity.shape[0]  # number of atomic signals
    if atoms is None:
        atoms = [chr(97 + e) for e in range(alphabet_size)]

    insertion_costs = np.ones(alphabet_size)
    deletion_costs = np.ones(alphabet_size)
    substitution_costs = atoms_dissimilarity

    # get list of possible words by iterating on possible word lengths
    all_words = []
    # at the start of iteration l, word_list will contain all words of len l-1
    word_list = ['']
    for l in range(max_word_len):
        word_list = [word + atom for word in word_list for atom in atoms]
        all_words = all_words + word_list

    # for each possible pair of words compute edit distance
    word_dis = np.empty((len(all_words), len(all_words)))
    for ia, wa in enumerate(all_words):
        a = np.array([atoms.index(e) for e in wa])
        for ib, wb in enumerate(all_words):
            # there is probably a smart way to pool computations instead of
            # considering each pair of words independently, let's look at it
            # if this part of the code ever becomes a bottleneck
            b = np.array([atoms.index(e) for e in wb])
            d = ED.edit_distance(a,
                                 b,
                                 s_cost=substitution_costs,
                                 d_cost=deletion_costs,
                                 i_cost=insertion_costs)
            word_dis[ia, ib] = d

    confusion_probas = np.exp(-beta * word_dis)
    S = np.sum(confusion_probas, axis=1)
    confusion_probas = confusion_probas / np.tile(S, (len(all_words), 1)).T
    return all_words, confusion_probas

Example #46

0

Show file

File: test_edit_distance.py Project: mmweber2/adm

def test_edit_one_del():
    assert_equals(edit_distance("chat", "chaut"), 1)

Example #47

0

Show file

def test_edit_distance(x, y, expected):
    assert edit_distance(x, y) == expected

Example #48

0

Show file

File: test_edit_distance.py Project: mmweber2/adm

def test_edit_longer_strings():
    # pineapple, peneapple, penelpple, penelople, penelope
    assert_equals(edit_distance("pineapple", "penelope"), 4)

Example #49

0

Show file

map_num2phone, map_48to39, map_phone2alpha = make_map(data_dir)
dis = 0
for k in instance_list:
    s0 = ""
    for n in Y_valid_dic[k]:
        s0 += map_phone2alpha[map_48to39[map_num2phone[n]]]
    s0 = string_compress(s0)

    s1 = ""
    for n in Y_dic[k]:
        s1 += map_phone2alpha[map_48to39[map_num2phone[n]]]
    s1 = string_trim(s1)
    s1 = string_trim2(s1)
    s1 = string_compress(s1)

    dis += edit_distance(s0, s1)

print(dis / len(instance_list))

print("No trim:")
for t in [
        0.55, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7,
        0.71, 0.73, 0.75, 0.77, 0.8
]:
    dis = 0
    for k in instance_list:
        s0 = ""
        for n in Y_valid_dic[k]:
            s0 += map_phone2alpha[map_48to39[map_num2phone[n]]]
        s0 = string_compress(s0)
        s1 = ""

Example #50

0

Show file

File: test_edit_distance.py Project: mmweber2/adm

def test_edit_one_empty_string():
    assert_equals(edit_distance("", "one"), 3)

Example #51

0

Show file

File: test.py Project: dmitri-mamrukov/coursera-data-structures-and-algorithms

 def test_with_short_and_ports(self):
     word1 = 'short'
     word2 = 'ports'
     self.assertEqual(3, edit_distance(word1, word2))

Example #52

0

Show file

File: test.py Project: hokix/jadesoulpp

#coding:utf8
import pyximport; pyximport.install()
import edit_distance

if __name__=='__main__':
	# print edit_distance.edit_distance(1, 'abc')
	print edit_distance.edit_distance('1', 'abc')
	print edit_distance.edit_distance('a', 'abc')

Example #53

0

Show file

 def test_edit_dist_1(self):
     string_a = "zettel"
     string_b = "yaethel"
     self.assertEqual(edit_distance(string_a, string_b), 3)