Example #1
0
 def test_edit_distance4(self):
     """Test edit distance against an empty list."""
     a = []
     b = ['a', 'c']
     self.assertTrue(edit_distance(a, b) == (2, 0))
     self.assertTrue(edit_distance(b, a) == (2, 0))
     self.assertTrue(edit_distance(a, a) == (0, 0))
Example #2
0
    def better_worse_without_fuzzy(self, system):
        from edit_distance import edit_distance
        
        distance = 0
        rank_cmp = 0
        comparisons = []

        try:
            original_rank = self.system_indexed[system].rank

            system_segment = self.system_indexed[system].segment
            system_indexed_copy = dict(self.system_indexed)
            del system_indexed_copy[system]
            closest_system = min(system_indexed_copy, key=lambda x: edit_distance(system_segment, system_indexed_copy[x].segment))
            closest_rank = system_indexed_copy[closest_system].rank
            system_indexed_copy[system] = SegmentRank(segment=system_segment, rank=closest_rank)


            closest_segment = system_indexed_copy[closest_system].segment
            distance = edit_distance(system_segment, closest_segment)

            rank_cmp = cmp(closest_rank, original_rank)
            #if closest_segment != system_segment:
            #    comp = "\\better{}" if closest_rank < original_rank else "\\worse{}" if closest_rank > original_rank else "\\equal{}"
            #    print(system_segment.encode('utf-8'), "&", closest_segment.encode('utf-8'), "&", distance, "&", comp, "\\\\")
            


            for system1, segment_rank1 in system_indexed_copy.items():
                for system2, segment_rank2 in system_indexed_copy.items():
                    if segment_rank1.rank < segment_rank2.rank:
                        comparisons.append((system1, system2))


        except KeyError:
            #print("KeyError")
            pass
        except ValueError:
            #print("ValueError")
            pass

        rank_cmp = -1 if rank_cmp < 0 else 1 if rank_cmp > 0 else 0
        return rank_cmp, distance, comparisons
Example #3
0
def evaluate(ref_table, s):
    """Given a sentence and a reference table, create and return an
    Evaluation object. Save a copy in the sentence."""
    ref = ref_table.get(s.id_)
    if ref is None:
        raise Exception('No reference loaded for ID: {}'.format(s.id_))
    distance, matches = edit_distance(ref.words, s.words)
    eval_ = Evaluation(len(ref.words), matches, distance)
    s.eval_ = eval_
    return eval_
Example #4
0
 def test_edit_distance1(self):
     """Test edit distance between 'ab' and 'acdab'."""
     a = ['a', 'b']
     b = ['a', 'c', 'd', 'a', 'b']
     self.assertTrue(edit_distance(a, b) == (3, 2))
     bp_expected_result = (3, 2, [['insert', 0, 0, 0, 1],
                                  ['insert', 0, 0, 1, 2],
                                  ['insert', 0, 0, 2, 3],
                                  ['equal', 0, 1, 3, 4],
                                  ['equal', 1, 2, 4, 5]])
     self.assertTrue(edit_distance_backpointer(a, b) == bp_expected_result)
Example #5
0
 def test_edit_distance2(self):
     """Test edit distance for 'hi my name is andy'."""
     a = ['hi', 'my', 'name', 'is', 'andy']
     b = ['hi', "i'm", 'my', "name's", 'sandy']
     self.assertTrue(edit_distance(a, b) == (4, 1))
     bp_expected_result = (4, 1, [['equal', 0, 1, 0, 1],
                                  ['replace', 1, 2, 1, 2],
                                  ['replace', 2, 3, 2, 3],
                                  ['replace', 3, 4, 3, 4],
                                  ['replace', 4, 5, 4, 5]])
     self.assertTrue(edit_distance_backpointer(a, b) == bp_expected_result)
Example #6
0
 def test_edit_distance_highest_match(self):
     """Test edit distance for 'hi my name is andy', maximizing matches rather than
     minimizing edits."""
     a = ['hi', 'my', 'name', 'is', 'andy']
     b = ['hi', "i'm", 'my', "name's", 'sandy']
     self.assertTrue(edit_distance(a, b, action_function=highest_match_action) == (4, 2))
     bp_expected_result = (4, 2, [['equal', 0, 1, 0, 1],
                                  ['insert', 0, 0, 1, 2],
                                  ['equal', 1, 2, 2, 3],
                                  ['delete', 2, 3, 2, 2],
                                  ['replace', 3, 4, 3, 4],
                                  ['replace', 4, 5, 4, 5]])
     self.assertTrue(edit_distance_backpointer(a, b, action_function=highest_match_action) == bp_expected_result)
Example #7
0
def extract_labels(fp):
	cl_labels = []
	for i in fp:
		if i != '\n':

			data = i.split('\t')

			# lemma
			lemma = data[2]
	
			# contains the token
			token = data[1]
			
			# opens a file and appends the tokens
			'''with open('training_tokens.txt','a') as t:
				t.write(token + ' ' + lemma + '\n')
			'''

			# find the edit_distance between token and its lemma
			# appends the class labels
			cl_labels.append(edit_distance(token, lemma))
				
	return cl_labels
Example #8
0
    def evaluate_single(_sentinel=None, gt='', pred=''):
        """ Evaluate a single pair of data

        Parameters
        ----------
        args : ground truth, prediction

        Returns
        -------
        int
            length of ground truth
        int
            number of errors
        int
            number of synchronisation errors
        dict
            confusions dictionary

        """
        if _sentinel is not None:
            raise Exception('Call this function by specifying gt and pred explicitly')

        confusion = {}
        total_sync_errs = 0
        errs, trues = edit_distance(gt, pred)
        synclist = synchronize([gt, pred])
        for sync in synclist:
            gt_str, pred_str = sync.get_text()
            if gt_str != pred_str:
                key = (gt_str, pred_str)
                total_sync_errs += max(len(gt_str), len(pred_str))
                if key not in confusion:
                    confusion[key] = 1
                else:
                    confusion[key] += 1

        return len(gt), errs, total_sync_errs, confusion
 def test_with_empty_and_one_char_word(self):
     word1 = ''
     word2 = 'a'
     self.assertEqual(1, edit_distance(word1, word2))
Example #10
0
def hiragana_distance(target,source):
    roma1 = hira_toroma(target)
    roma2 = hira_toroma(source)
    return edit_distance(roma1,roma2)
 def test_with_same_length_words_with_one_same_char_in_same_position(self):
     word1 = 'abc'
     word2 = 'ayz'
     self.assertEqual(2, edit_distance(word1, word2))
 def test_with_exponential_and_polynomial(self):
     word1 = 'EXPONENTIAL'
     word2 = 'POLYNOMIAL'
     self.assertEqual(6, edit_distance(word1, word2))
 def test_with_empty_and_two_char_word(self):
     word1 = ''
     word2 = 'ab'
     self.assertEqual(2, edit_distance(word1, word2))
 def test_with_same_three_char_words(self):
     word1 = 'abc'
     word2 = 'abc'
     self.assertEqual(0, edit_distance(word1, word2))
Example #15
0
def test_information_applications():
    assert edit_distance("information", "applications") == 7
 def test_change(self):
     "able to find a simple change"
     self.assertEqual(edit_distance("hat", "cat"), 1)
Example #17
0
    f = [[10**9] * (tn + 2) for _ in range(sn + 2)]
    f[0][0] = 0

    def relax(p, q, x):
        f[p][q] = min(f[p][q], x)

    for i in range(sn + 1):
        for j in range(tn + 1):
            if i < sn and j < tn:
                relax(i + 1, j + 1, f[i][j] + (1 if s[i] != t[j] else 0))
            relax(i + 1, j, f[i][j] + 1)
            relax(i, j + 1, f[i][j] + 1)
    return f[sn][tn]


if __name__ == '__main__':
    run_common_tests()
    check_tests_pass("edit_distance_unit_tests.py")

    all_tests_passed = True

    for first, second in (("abacabadabacabaeabacab", "aeabacabad"), ):
        if edit_distance(first, second) != reference(first, second):
            all_tests_passed = False
            failed("Wrong answer for {} and {}".format(first, second))
            break

    if all_tests_passed:
        passed()
Example #18
0
def main():
    print("Welcome to the Edit Distance Program!")
    print("")
    print(
        "This program will perform three different tests. Let's get started!")
    print("")
    print("Test 1: ")
    #Test case 1
    string_1 = "brand"
    string_2 = "random"
    print("The two words being compared are '", string_1, "' and '", string_2,
          "'.")
    start1 = time.time()
    print(ed.edit_distance(string_1, string_2, len(string_1), len(string_2)))
    end1 = time.time()
    print('Running time for test 1 was: ', end1 - start1, 'seconds.')
    print("")
    print("Test 2: ")
    #Test case 2
    string_1 = "any"
    string_2 = "any"
    print("The two words being compared are '", string_1, "' and '", string_2,
          "'.")
    start2 = time.time()
    print(ed.edit_distance(string_1, string_2, len(string_1), len(string_2)))
    end2 = time.time()
    print('Running time for test 1 was: ', end2 - start2, 'seconds.')
    print("")
    print("Test 3: ")
    #Test case 3
    string_1 = "magnificus"
    string_2 = ""
    print("The two words being compared are '", string_1, "' and '", string_2,
          "'.")
    start3 = time.time()
    print(ed.edit_distance(string_1, string_2, len(string_1), len(string_2)))
    end3 = time.time()
    print('Running time for test 1 was: ', end3 - start3, 'seconds.')
    print("")
    print("Tests completed!")
    print("")
    print("Would you like to try two words for yourself? Yes or No")
    user_selection = input()
    if (user_selection == 'yes' or user_selection == 'Yes'
            or user_selection == 'YES'):
        print("Excellent! Now, which two words would you like to try?")
        print("Word 1: ")
        string_1 = input()
        print("Word 2: ")
        string_2 = input()
        print("The two words being compared are '", string_1, "' and '",
              string_2, "'.")
        start4 = time.time()
        print(
            ed.edit_distance(string_1, string_2, len(string_1), len(string_2)))
        end4 = time.time()
        print('Running time for test 1 was: ', end4 - start4, 'seconds.')
        print("")
        print("Program complete! See you again soon!")
    elif (user_selection == 'no' or user_selection == 'No'
          or user_selection == 'NO'):
        print("Very well.")
        print("Program complete! See you again soon!")
    else:
        print("ERROR! Input invalid!")
        print("Goodbye!")
 def test_delete(self):
     "able to find a simple delete"
     self.assertEqual(edit_distance("aaab", "aaa"), 1)
 def test_equal(self):
     "handles equal strings"
     self.assertEqual(edit_distance("azerty", "azerty"), 0)
 def test_empty(self):
     "handles empty strings"
     self.assertEqual(edit_distance("", "a"), 1)
     self.assertEqual(edit_distance("a", ""), 1)
     self.assertEqual(edit_distance("", ""), 0)
Example #22
0
import edit_distance2
import nltk
from string import ascii_letters

if __name__=='__main__':
    l=list(ascii_letters[:10])
    s=''
    for i in xrange(200):
        shuffle(l)
        s+=''.join(l)
    t=''
    for i in xrange(100):
        shuffle(l)
        t+=''.join(l)
    
    n=20
    print len(s), len(t)
    
    time_init()
    for i in xrange(n):
        b=edit_distance.edit_distance(s, t)
    print time_gap('edit_distance.edit_distance')
    
    for i in xrange(n):
        a=edit_distance2.edit_distance(s, t)
    print time_gap('edit_distance2.edit_distance')
    
    for i in xrange(n):
        a=nltk.edit_distance(s, t)
    print time_gap('nltk.edit_distance')
    
Example #23
0
def test_editing_distance_1_1_100():
    assert edit_distance("editing", "distance", 1, 1, 100) == 7
Example #24
0
 def test_edit_dist_empty_str(self):
     string_a = ""
     string_b = "abc"
     self.assertEqual(edit_distance(string_a, string_b), 3)
Example #25
0
def test_editing_distance_1_100_1():
    assert edit_distance("editing", "distance", 1, 100, 1) == 6
Example #26
0
 def test_edit_dist_2(self):
     string_a = "editing"
     string_b = "distance"
     self.assertEqual(edit_distance(string_a, string_b), 5)
Example #27
0
def test_a_b_100_1_1():
    assert edit_distance("a", "b", 100, 1, 2) == 2
 def test_with_three_char_word_and_empty(self):
     word1 = 'abc'
     word2 = ''
     self.assertEqual(3, edit_distance(word1, word2))
Example #29
0
def test_edit_redit():
    assert edit_distance("edit", "redit") == 1
 def test_with_words_of_different_chars(self):
     word1 = 'abc'
     word2 = 'xyz'
     self.assertEqual(3, edit_distance(word1, word2))
Example #31
0
def test_a_bb_100_1_1():
    assert edit_distance("a", "bb", 100, 1, 1) == 101
 def test_with_same_length_words_with_one_same_char_away_by_two(self):
     word1 = 'abc'
     word2 = 'xya'
     self.assertEqual(3, edit_distance(word1, word2))
Example #33
0
def sentence_editdistance(s1, s2):
    """Given two 'sentence' objects compute the edit distance and
    return the distance."""
    distance, _ = edit_distance(s1.words, s2.words)
    return distance
 def test_with_snowy_and_sunny(self):
     word1 = 'SNOWY'
     word2 = 'SUNNY'
     self.assertEqual(3, edit_distance(word1, word2))
Example #35
0
def test_edit_one_swap():
    assert_equals(edit_distance("cat", "car"), 1)
 def test_with_atgttata_and_atcgtcc(self):
     word1 = 'ATGTTATA'
     word2 = 'ATCGTCC'
     self.assertEqual(5, edit_distance(word1, word2))
Example #37
0
def test_edit_one_add():
    assert_equals(edit_distance("char", "chair"), 1)
Example #38
0
    y_pred = np.zeros((t50_num[i]), dtype=np.int32)
    y_v = np.zeros((t50_num[i]), dtype=np.float32)
    for ii in idx:
        y_true[ii] = Y_valid[count]
        y_pred[ii] = Y[count]
        y_v[ii] = Y_value[count]
        count += 1
    acc += sum(y_true == y_pred)
    cc += len(y_true)

    s0 = ""
    for k in range(len(y_true)):
        s0 += map_phone2alpha[map_48to39[map_num2phone[y_true[k]]]]
    s0 = string_compress(s0)

    for it, t in enumerate(threshold):
        s1 = ""
        for k in range(len(y_pred)):
            if y_v[k] > t:
                s1 += map_phone2alpha[map_48to39[map_num2phone[y_pred[k]]]]
        s1 = window_slide(s1, phone_mean_num)
        s1 = string_trim(s1)
        s1 = string_trim2(s1)
        s1 = string_compress(s1)
        dis[it] += edit_distance(s0, s1)

print("Acc:", acc / cc)
dis = dis / len(t50_num)
for i in range(len(threshold)):
    print(threshold[i], dis[i])
Example #39
0
def test_edit_same_string():
    assert_equals(edit_distance("one", "one"), 0)
Example #40
0
def main():
    start, end = str.split(input())
    print(edit_distance(start, end))
Example #41
0
def test_edit_two_empty_strings():
    assert_equals(edit_distance("", ""), 0)
Example #42
0
def test_bc_bbcc_100_1_1():
    assert edit_distance("bc", "bbcc", 100, 1, 1) == 200
Example #43
0
def test_ed(s1, s2, expected_edits):
    ed, actual_edits = edit_distance(s1, s2, transpositions=True)
    assert actual_edits == expected_edits
    assert ed == sum(expected_edits.values())
Example #44
0
def test_digit_redit():
    assert edit_distance("digit", "redit") == 3
Example #45
0
def word_confusion(atoms_dissimilarity, max_word_len, beta=1, atoms=None):
    """
    Get word-level confusion probability matrix
    from atomic-signals-level dissimilarities
    for all words under max_word_len length.
    
    Dissimilarities are assumed to be numbers between 0 and 1.
    
    Based on computing edit-distances using the atoms_dissimilarity matrix
    as substitution costs and obtaining probabilities by
    taking exp(-beta*ED(w1, w2)) and renormalising each row to sum to 1.
    Use larger beta get more peaked confusion probabilities
    
    Justification: for same-length words we retrieve (and it's the only way to
    retrieve?) Fletcher's sequential law. Also used in one of the Harvard guy 
    working on evolutionary dynamics paper (1999, not PNAS).
    
    Returns a list of words (based on the atoms argument if it is a list of
    strings or an arbitrary alphabetic mapping if atoms is None) along
    with the confusion probability matrix between these words such that the 
    number on row i column j, corresponds to the probability of having word
    j being received when word i was intended.
    """
    assert np.all(atoms_dissimilarity <= 1)
    assert np.all(atoms_dissimilarity >= 0)

    alphabet_size = atoms_dissimilarity.shape[0]  # number of atomic signals
    if atoms is None:
        atoms = [chr(97 + e) for e in range(alphabet_size)]

    insertion_costs = np.ones(alphabet_size)
    deletion_costs = np.ones(alphabet_size)
    substitution_costs = atoms_dissimilarity

    # get list of possible words by iterating on possible word lengths
    all_words = []
    # at the start of iteration l, word_list will contain all words of len l-1
    word_list = ['']
    for l in range(max_word_len):
        word_list = [word + atom for word in word_list for atom in atoms]
        all_words = all_words + word_list

    # for each possible pair of words compute edit distance
    word_dis = np.empty((len(all_words), len(all_words)))
    for ia, wa in enumerate(all_words):
        a = np.array([atoms.index(e) for e in wa])
        for ib, wb in enumerate(all_words):
            # there is probably a smart way to pool computations instead of
            # considering each pair of words independently, let's look at it
            # if this part of the code ever becomes a bottleneck
            b = np.array([atoms.index(e) for e in wb])
            d = ED.edit_distance(a,
                                 b,
                                 s_cost=substitution_costs,
                                 d_cost=deletion_costs,
                                 i_cost=insertion_costs)
            word_dis[ia, ib] = d

    confusion_probas = np.exp(-beta * word_dis)
    S = np.sum(confusion_probas, axis=1)
    confusion_probas = confusion_probas / np.tile(S, (len(all_words), 1)).T
    return all_words, confusion_probas
Example #46
0
def test_edit_one_del():
    assert_equals(edit_distance("chat", "chaut"), 1)
Example #47
0
def test_edit_distance(x, y, expected):
    assert edit_distance(x, y) == expected
Example #48
0
def test_edit_longer_strings():
    # pineapple, peneapple, penelpple, penelople, penelope
    assert_equals(edit_distance("pineapple", "penelope"), 4)
Example #49
0
map_num2phone, map_48to39, map_phone2alpha = make_map(data_dir)
dis = 0
for k in instance_list:
    s0 = ""
    for n in Y_valid_dic[k]:
        s0 += map_phone2alpha[map_48to39[map_num2phone[n]]]
    s0 = string_compress(s0)

    s1 = ""
    for n in Y_dic[k]:
        s1 += map_phone2alpha[map_48to39[map_num2phone[n]]]
    s1 = string_trim(s1)
    s1 = string_trim2(s1)
    s1 = string_compress(s1)

    dis += edit_distance(s0, s1)

print(dis / len(instance_list))

print("No trim:")
for t in [
        0.55, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7,
        0.71, 0.73, 0.75, 0.77, 0.8
]:
    dis = 0
    for k in instance_list:
        s0 = ""
        for n in Y_valid_dic[k]:
            s0 += map_phone2alpha[map_48to39[map_num2phone[n]]]
        s0 = string_compress(s0)
        s1 = ""
Example #50
0
def test_edit_one_empty_string():
    assert_equals(edit_distance("", "one"), 3)
 def test_with_short_and_ports(self):
     word1 = 'short'
     word2 = 'ports'
     self.assertEqual(3, edit_distance(word1, word2))
Example #52
0
#coding:utf8
import pyximport; pyximport.install()
import edit_distance

if __name__=='__main__':
	# print edit_distance.edit_distance(1, 'abc')
	print edit_distance.edit_distance('1', 'abc')
	print edit_distance.edit_distance('a', 'abc')
    
Example #53
0
 def test_edit_dist_1(self):
     string_a = "zettel"
     string_b = "yaethel"
     self.assertEqual(edit_distance(string_a, string_b), 3)