def similarity(self, other): if self.sha == other.sha: return 1.0 if self._is_binary: if self.binary == other.binary: return 1.0 else: return 0.0 return similarity(self.binary, other.binary)
def test_string_similarity(self): base = '\n'.join([str(i) for i in range(0, 10)]) other = base EPS = 0.0001 for i in range(0, 10): other = other.replace(str(i), '*') sim = similarity(base, other) self.assertGreater((10 - i) / 10.0, sim) self.assertLessEqual((9 - i) / 10.0, sim + EPS)
def similarity(self, other): """ compares similarity for text blobs returns: if binary content return 1 if equal 0 otherwise if text content return 0.0-1.0 of % of equal lines """ if self.sha == other.sha: return 1.0 if self._is_binary: if self.bytes == other.bytes: return 1.0 else: return 0.0 return similarity(self.bytes, other.bytes)