def testEditDist(
            self):  # - - - - - - - - - - - - - - - - - - - - - - - - -
        """Test 'EditDist' approximate string comparator"""

        for pair in self.string_pairs:

            approx_str_value = stringcmp.editdist(pair[0], pair[1])

            assert (isinstance(approx_str_value,float)), \
                   '"EditDist" does not return a floating point number for: '+ \
                   str(pair)

            assert (approx_str_value >= 0.0), \
                   '"EditDist" returns a negative number for: '+str(pair)

            assert (approx_str_value <= 1.0), \
                   '"EditDist" returns a number larger than 1.0 for: '+str(pair)

            approx_str_value_1 = stringcmp.editdist(pair[0], pair[1])
            approx_str_value_2 = stringcmp.editdist(pair[1], pair[0])

            assert (approx_str_value_1 == approx_str_value_2), \
                   '"EditDist" returns different values for pair and swapped ' + \
                   'pair: '+str(pair)+': '+str(approx_str_value_1)+', '+ \
                   str(approx_str_value_2)

            # Check for value 1.0 if the strings are the same
            #
            if (pair[0] == pair[1]):

                assert (approx_str_value == 1.0), \
                       '"EditDist" does not return 1.0 if strings are equal: '+ \
                       str(pair)
예제 #2
0
  def testEditDist(self):   # - - - - - - - - - - - - - - - - - - - - - - - - -
    """Test 'EditDist' approximate string comparator"""

    for pair in self.string_pairs:

      approx_str_value = stringcmp.editdist(pair[0],pair[1])

      assert (isinstance(approx_str_value,float)), \
             '"EditDist" does not return a floating point number for: '+ \
             str(pair)

      assert (approx_str_value >= 0.0), \
             '"EditDist" returns a negative number for: '+str(pair)

      assert (approx_str_value <= 1.0), \
             '"EditDist" returns a number larger than 1.0 for: '+str(pair)

      approx_str_value_1 = stringcmp.editdist(pair[0],pair[1])
      approx_str_value_2 = stringcmp.editdist(pair[1],pair[0])

      assert (approx_str_value_1 == approx_str_value_2), \
             '"EditDist" returns different values for pair and swapped ' + \
             'pair: '+str(pair)+': '+str(approx_str_value_1)+', '+ \
             str(approx_str_value_2)

      # Check for value 1.0 if the strings are the same
      #
      if (pair[0] == pair[1]):

        assert (approx_str_value == 1.0), \
               '"EditDist" does not return 1.0 if strings are equal: '+ \
               str(pair)
예제 #3
0
    def testBagDist(self):  # - - - - - - - - - - - - - - - - - - - - - - - - -
        """Test 'BagDist' approximate string comparator"""

        for pair in self.string_pairs:

            approx_str_value = stringcmp.bagdist(pair[0], pair[1])

            assert isinstance(
                approx_str_value, float
            ), '"BagDist" does not return a floating point number for: ' + str(
                pair)

            assert (approx_str_value >= 0.0
                    ), '"BagDist" returns a negative number for: ' + str(pair)

            assert (
                approx_str_value <= 1.0
            ), '"BagDist" returns a number larger than 1.0 for: ' + str(pair)

            approx_str_value_1 = stringcmp.bagdist(pair[0], pair[1])
            approx_str_value_2 = stringcmp.bagdist(pair[1], pair[0])

            assert approx_str_value_1 == approx_str_value_2, (
                '"BagDist" returns different values for pair and swapped ' +
                "pair: " + str(pair) + ": " + str(approx_str_value_1) + ", " +
                str(approx_str_value_2))

            # Check for value 1.0 if the strings are the same
            #
            if pair[0] == pair[1]:

                assert (
                    approx_str_value == 1.0
                ), '"BagDist" does not return 1.0 if strings are equal: ' + str(
                    pair)

            # Check if bad distance is always larger than edit distance

            editdist_str_value = stringcmp.editdist(pair[0], pair[1])
            assert (
                approx_str_value >= editdist_str_value
            ), '"BagDist" value is smaller than "EditDist" value for: ' + str(
                pair)
예제 #4
0
파일: tools.py 프로젝트: cgl/CWA-Normalizer
def distance(ovv,cand):
    ovv = get_reduced(ovv)
    return editdist(ovv,cand)
예제 #5
0
        ##------posqgram 3------##
        start_time7 = time.time()
        s += ' %.3f' % (stringcmp.posqgram(twoNames[0], twoNames[1], 3))
        time_used7 = time.time() - start_time7
        s += ' %.10f' % (time_used7)

        ##------sgram------##
        start_time8 = time.time()
        s += ' %.3f' % (stringcmp.sgram(twoNames[0], twoNames[1],
                                        [[0], [0, 1], [1, 2]]))
        time_used8 = time.time() - start_time8
        s += ' %.10f' % (time_used8)

        ##------editdist------##
        start_time9 = time.time()
        s += ' %.3f' % (stringcmp.editdist(twoNames[0], twoNames[1]))
        time_used9 = time.time() - start_time9
        s += ' %.10f' % (time_used9)

        ##------mod_editdist------##
        start_time10 = time.time()
        s += ' %.3f' % (stringcmp.mod_editdist(twoNames[0], twoNames[1]))
        time_used10 = time.time() - start_time10
        s += ' %.10f' % (time_used10)

        ##------bagdist------##
        start_time11 = time.time()
        s += ' %.3f' % (stringcmp.bagdist(twoNames[0], twoNames[1]))
        time_used11 = time.time() - start_time11
        s += ' %.10f' % (time_used11)