Ejemplo n.º 1
0
 def test_set_operations(self):
     """Test advanced set operations"""
     items1 = set(["abcde", "cdefg", "fghijk", "ijklm"])
     items2 = set(["cdefg", "lmnop"])
     idx1 = NGram(items1)
     idx2 = NGram(items2)
     results = lambda L: sorted(x[0] for x in L)
     # Item removal
     self.assertEqual(results(idx1.search('cde')), ["abcde","cdefg"])
     idx1.remove('abcde')
     self.assertEqual(results(idx1.search('cde')), ["cdefg"])
     # Set intersection operation
     items1.remove('abcde')
     idx1.intersection_update(idx2)
     self.assertEqual(idx1, items1.intersection(items2))
     self.assertEqual(results(idx1.search('lmn')), [])
     self.assertEqual(results(idx1.search('ijk')), [])
     self.assertEqual(results(idx1.search('def')), ['cdefg'])
Ejemplo n.º 2
0
 def test_set_operations(self):
     """Test advanced set operations"""
     items1 = set(["abcde", "cdefg", "fghijk", "ijklm"])
     items2 = set(["cdefg", "lmnop"])
     idx1 = NGram(items1)
     idx2 = NGram(items2)
     results = lambda L: sorted(x[0] for x in L)
     # Item removal
     self.assertEqual(results(idx1.search('cde')), ["abcde", "cdefg"])
     idx1.remove('abcde')
     self.assertEqual(results(idx1.search('cde')), ["cdefg"])
     # Set intersection operation
     items1.remove('abcde')
     idx1.intersection_update(idx2)
     self.assertEqual(idx1, items1.intersection(items2))
     self.assertEqual(results(idx1.search('lmn')), [])
     self.assertEqual(results(idx1.search('ijk')), [])
     self.assertEqual(results(idx1.search('def')), ['cdefg'])
Ejemplo n.º 3
0
def ngram(w1, w2, n):
    """
    ngram distance
    """
    pad = lambda x : "#{}#".format(x)
    w1, w2 = pad(w1), pad(w2)

    g1 = [w1[i:i+n] for i in range(len(w1)-n+1)]
    g2 = [w2[i:i+n] for i in range(len(w2)-n+1)]


    # compute ngram similarity
    # d(a, b) = |a| + |b| + 2|a intersection b|
    n = NGram(g1)
    n.intersection_update(g2)

    d = len(g1) + len(g2) - 2 * len(list(n))
    return d