def calculate(base, term): base = base.lower() term = term.lower() edit_distance = editDistance.calculate(base, term) jaccard_distance = jaccardDistance.calculate(base, term) lcss_distance = longestCommonSubstringDistance.calculate(base, term) jaccard_metric = jaccard_distance lcss_metric = lcss_distance / (len(base) if len(base) > len(term) else len(term)) edit_distance_metric = edit_distance / len(base) if jaccard_metric == 1: metric = edit_distance elif jaccard_metric == 0: metric = edit_distance_metric else: metric = jaccard_metric * edit_distance if lcss_metric > 0: metric /= lcss_metric else: metric *= (len(base) if len(base) < len(term) else len(term)) return metric
def calculate(base,term): base = base.lower() term = term.lower() edit_distance = editDistance.calculate(base,term) jaccard_distance = jaccardDistance.calculate(base,term) lcss_distance = longestCommonSubstringDistance.calculate(base,term) jaccard_metric = jaccard_distance lcss_metric = lcss_distance/(len(base) if len(base) > len(term) else len(term)) edit_distance_metric = edit_distance/len(base) if jaccard_metric == 1: metric = edit_distance elif jaccard_metric == 0: metric = edit_distance_metric else: metric = jaccard_metric * edit_distance if lcss_metric > 0: metric /= lcss_metric else: metric *= (len(base) if len(base) < len(term) else len(term)) return metric
def test_edit_distance_of_identical_words_case_senseitive(self): editDistance = calculate("hi", "Hi") self.assertEqual(editDistance, 1)
def test_edit_distance_of_identical_words(self): editDistance = calculate("hi", "hi") self.assertEqual(editDistance, 0)
def test_edit_distance_of_words_with_similar_letters(self): editDistance = calculate("act", "catch") self.assertEqual(editDistance, 3)
def test_edit_distance_of_anagrams(self): editDistance = calculate("tame", "mate") self.assertEqual(editDistance, 2)
def test_edit_distance_of_subset_words(self): editDistance = calculate("cat", "catch") self.assertEqual(editDistance, 2)
def test_edit_distance_of_similar_words(self): editDistance = calculate("cat", "bat") self.assertEqual(editDistance, 1)
def test_edit_distance_of_identical_words_case_senseitive(self): editDistance = calculate("hi","Hi") self.assertEqual(editDistance, 1)
def test_edit_distance_of_identical_words(self): editDistance = calculate("hi","hi") self.assertEqual(editDistance, 0)
def test_edit_distance_of_words_with_similar_letters(self): editDistance = calculate("act","catch") self.assertEqual(editDistance, 3)
def test_edit_distance_of_anagrams(self): editDistance = calculate("tame","mate") self.assertEqual(editDistance, 2)
def test_edit_distance_of_subset_words(self): editDistance = calculate("cat","catch") self.assertEqual(editDistance, 2)
def test_edit_distance_of_similar_words(self): editDistance = calculate("cat","bat") self.assertEqual(editDistance, 1)