Beispiel #1
0
 def similarity(self, other):
     if self.sha == other.sha:
         return 1.0
     if self._is_binary:
         if self.binary == other.binary:
             return 1.0
         else:
             return 0.0
     return similarity(self.binary, other.binary)
 def test_string_similarity(self):
     base = '\n'.join([str(i) for i in range(0, 10)])
     other = base
     EPS = 0.0001
     for i in range(0, 10):
         other = other.replace(str(i), '*')
         sim = similarity(base, other)
         self.assertGreater((10 - i) / 10.0, sim)
         self.assertLessEqual((9 - i) / 10.0, sim + EPS)
Beispiel #3
0
 def similarity(self, other):
     if self.sha == other.sha:
         return 1.0
     if self._is_binary:
         if self.binary == other.binary:
             return 1.0
         else:
             return 0.0
     return similarity(self.binary, other.binary)
Beispiel #4
0
 def test_string_similarity(self):
     base = '\n'.join([str(i) for i in range(0, 10)])
     other = base
     EPS = 0.0001
     for i in range(0, 10):
         other = other.replace(str(i), '*')
         sim = similarity(base, other)
         self.assertGreater((10 - i) / 10.0, sim)
         self.assertLessEqual((9 - i) / 10.0, sim + EPS)
Beispiel #5
0
 def similarity(self, other):
     """ compares similarity for text blobs
     returns: if binary content return 1 if equal 0 otherwise
              if text content return 0.0-1.0 of % of equal lines
     """
     if self.sha == other.sha:
         return 1.0
     if self._is_binary:
         if self.bytes == other.bytes:
             return 1.0
         else:
             return 0.0
     return similarity(self.bytes, other.bytes)
Beispiel #6
0
 def similarity(self, other):
     """ compares similarity for text blobs
     returns: if binary content return 1 if equal 0 otherwise
              if text content return 0.0-1.0 of % of equal lines
     """
     if self.sha == other.sha:
         return 1.0
     if self._is_binary:
         if self.bytes == other.bytes:
             return 1.0
         else:
             return 0.0
     return similarity(self.bytes, other.bytes)