def testEditDist( self): # - - - - - - - - - - - - - - - - - - - - - - - - - """Test 'EditDist' approximate string comparator""" for pair in self.string_pairs: approx_str_value = stringcmp.editdist(pair[0], pair[1]) assert (isinstance(approx_str_value,float)), \ '"EditDist" does not return a floating point number for: '+ \ str(pair) assert (approx_str_value >= 0.0), \ '"EditDist" returns a negative number for: '+str(pair) assert (approx_str_value <= 1.0), \ '"EditDist" returns a number larger than 1.0 for: '+str(pair) approx_str_value_1 = stringcmp.editdist(pair[0], pair[1]) approx_str_value_2 = stringcmp.editdist(pair[1], pair[0]) assert (approx_str_value_1 == approx_str_value_2), \ '"EditDist" returns different values for pair and swapped ' + \ 'pair: '+str(pair)+': '+str(approx_str_value_1)+', '+ \ str(approx_str_value_2) # Check for value 1.0 if the strings are the same # if (pair[0] == pair[1]): assert (approx_str_value == 1.0), \ '"EditDist" does not return 1.0 if strings are equal: '+ \ str(pair)
def testEditDist(self): # - - - - - - - - - - - - - - - - - - - - - - - - - """Test 'EditDist' approximate string comparator""" for pair in self.string_pairs: approx_str_value = stringcmp.editdist(pair[0],pair[1]) assert (isinstance(approx_str_value,float)), \ '"EditDist" does not return a floating point number for: '+ \ str(pair) assert (approx_str_value >= 0.0), \ '"EditDist" returns a negative number for: '+str(pair) assert (approx_str_value <= 1.0), \ '"EditDist" returns a number larger than 1.0 for: '+str(pair) approx_str_value_1 = stringcmp.editdist(pair[0],pair[1]) approx_str_value_2 = stringcmp.editdist(pair[1],pair[0]) assert (approx_str_value_1 == approx_str_value_2), \ '"EditDist" returns different values for pair and swapped ' + \ 'pair: '+str(pair)+': '+str(approx_str_value_1)+', '+ \ str(approx_str_value_2) # Check for value 1.0 if the strings are the same # if (pair[0] == pair[1]): assert (approx_str_value == 1.0), \ '"EditDist" does not return 1.0 if strings are equal: '+ \ str(pair)
def testBagDist(self): # - - - - - - - - - - - - - - - - - - - - - - - - - """Test 'BagDist' approximate string comparator""" for pair in self.string_pairs: approx_str_value = stringcmp.bagdist(pair[0], pair[1]) assert isinstance( approx_str_value, float ), '"BagDist" does not return a floating point number for: ' + str( pair) assert (approx_str_value >= 0.0 ), '"BagDist" returns a negative number for: ' + str(pair) assert ( approx_str_value <= 1.0 ), '"BagDist" returns a number larger than 1.0 for: ' + str(pair) approx_str_value_1 = stringcmp.bagdist(pair[0], pair[1]) approx_str_value_2 = stringcmp.bagdist(pair[1], pair[0]) assert approx_str_value_1 == approx_str_value_2, ( '"BagDist" returns different values for pair and swapped ' + "pair: " + str(pair) + ": " + str(approx_str_value_1) + ", " + str(approx_str_value_2)) # Check for value 1.0 if the strings are the same # if pair[0] == pair[1]: assert ( approx_str_value == 1.0 ), '"BagDist" does not return 1.0 if strings are equal: ' + str( pair) # Check if bad distance is always larger than edit distance editdist_str_value = stringcmp.editdist(pair[0], pair[1]) assert ( approx_str_value >= editdist_str_value ), '"BagDist" value is smaller than "EditDist" value for: ' + str( pair)
def distance(ovv,cand): ovv = get_reduced(ovv) return editdist(ovv,cand)
##------posqgram 3------## start_time7 = time.time() s += ' %.3f' % (stringcmp.posqgram(twoNames[0], twoNames[1], 3)) time_used7 = time.time() - start_time7 s += ' %.10f' % (time_used7) ##------sgram------## start_time8 = time.time() s += ' %.3f' % (stringcmp.sgram(twoNames[0], twoNames[1], [[0], [0, 1], [1, 2]])) time_used8 = time.time() - start_time8 s += ' %.10f' % (time_used8) ##------editdist------## start_time9 = time.time() s += ' %.3f' % (stringcmp.editdist(twoNames[0], twoNames[1])) time_used9 = time.time() - start_time9 s += ' %.10f' % (time_used9) ##------mod_editdist------## start_time10 = time.time() s += ' %.3f' % (stringcmp.mod_editdist(twoNames[0], twoNames[1])) time_used10 = time.time() - start_time10 s += ' %.10f' % (time_used10) ##------bagdist------## start_time11 = time.time() s += ' %.3f' % (stringcmp.bagdist(twoNames[0], twoNames[1])) time_used11 = time.time() - start_time11 s += ' %.10f' % (time_used11)