Beispiel #1
0
def extractVenueFeatures(A,B):
    
     # remove pontuation
    A = re.sub(r'[^\w\s]','',A)
     # remove pontuation
    B = re.sub(r'[^\w\s]','',B)
    
    # remove multiple spaces
    A = re.sub(' +',' ',A)
    B = re.sub(' +',' ',B)
    if A == '':
        A = '-';
    if B == '':
        B = '-';
		
    charactersA = numpy.zeros(26)
    for char in A:
        if char >= 'a' and char <= 'z':
            charactersA[ord(char)-97]+=1
    
    charactersB = numpy.zeros(26)
    for char in B:
        if char >= 'a' and char <= 'z':
            charactersB[ord(char)-97]+=1
    
    subtraction = numpy.absolute(charactersA-charactersB)
    distance = numpy.sum(subtraction)
    
    max_chars =  max(numpy.sum(charactersA),numpy.sum(charactersB))
    if (max_chars ==0):
        measure =0
    else:
        measure = distance/float(max_chars)
    
    return [measure, fuzzycomp.levenshtein_distance(A,B), fuzzycomp.jaccard_distance(A,B), fuzzycomp.jaro_distance(A,B)]
Beispiel #2
0
def extractVenueFeatures(A, B):

    # remove pontuation
    A = re.sub(r'[^\w\s]', '', A)
    # remove pontuation
    B = re.sub(r'[^\w\s]', '', B)

    # remove multiple spaces
    A = re.sub(' +', ' ', A)
    B = re.sub(' +', ' ', B)
    if A == '':
        A = '-'
    if B == '':
        B = '-'

    charactersA = numpy.zeros(26)
    for char in A:
        if char >= 'a' and char <= 'z':
            charactersA[ord(char) - 97] += 1

    charactersB = numpy.zeros(26)
    for char in B:
        if char >= 'a' and char <= 'z':
            charactersB[ord(char) - 97] += 1

    subtraction = numpy.absolute(charactersA - charactersB)
    distance = numpy.sum(subtraction)

    max_chars = max(numpy.sum(charactersA), numpy.sum(charactersB))
    if (max_chars == 0):
        measure = 0
    else:
        measure = distance / float(max_chars)

    return [
        measure,
        fuzzycomp.levenshtein_distance(A, B),
        fuzzycomp.jaccard_distance(A, B),
        fuzzycomp.jaro_distance(A, B)
    ]
Beispiel #3
0
 def test_iterable_input(self):
     """Function should return correct values when called with valid iterables"""
     self.assertEqual(fuzzycomp.jaccard_distance([1, 2, 3, 4, 5], [5, 4, 3, 2, 1]), 0.0)
     self.assertEqual(fuzzycomp.jaccard_distance([1, 2, 3, 4, 5], [6, 7, 8, 9, 10]), 1.0)
Beispiel #4
0
 def test_valid_input(self):
     """Algorithm should return correct values under valid input"""
     self.assertEqual(fuzzycomp.jaccard_distance("Hello", "Hello"), 0.0)
     self.assertAlmostEqual(fuzzycomp.jaccard_distance("Hello", "World"), 0.7142857, 7)
     self.assertEqual(fuzzycomp.jaccard_distance("foo", "bar"), 1.0)
Beispiel #5
0
 def test_iterable_input(self):
     """Function should return correct values when called with valid iterables"""
     self.assertEqual(
         fuzzycomp.jaccard_distance([1, 2, 3, 4, 5], [5, 4, 3, 2, 1]), 0.0)
     self.assertEqual(
         fuzzycomp.jaccard_distance([1, 2, 3, 4, 5], [6, 7, 8, 9, 10]), 1.0)
Beispiel #6
0
 def test_valid_input(self):
     """Algorithm should return correct values under valid input"""
     self.assertEqual(fuzzycomp.jaccard_distance("Hello", "Hello"), 0.0)
     self.assertAlmostEqual(fuzzycomp.jaccard_distance("Hello", "World"),
                            0.7142857, 7)
     self.assertEqual(fuzzycomp.jaccard_distance("foo", "bar"), 1.0)