def test_valid(self): inputs = [((190, 1, 0), (190, 1, 0), 0), ((190, 1, 0), (190, 1, 1), 1 / 3), ((190, 0, 1), (190, 1, 0), 2 / 3), ((190, 0, 1), (191, 1, 0), 1), ((190, 0, 1), (190, 0, 1, 0), 1 / 4), ((190, ), (), 1), ((), (), 1)] for left, right, result in inputs: self.assertTrue( float_cmp(compare_single_path(left, right), result) and float_cmp(compare_single_path(right, left), result))
def test_invalid(self): 'Test rather unusual corner cases' calc = GenreTreeDistance(GenreTreeProvider()) self.assertTrue(float_cmp(calc.compute([], []), 1.0)) self.assertTrue(float_cmp(calc.compute([], [(1, 0)]), 1.0)) self.assertTrue(float_cmp(calc.compute([], ['berta']), 1.0)) # Funny one (strings are iterable) self.assertTrue(float_cmp(calc.compute(['berta'], ['berta']), 0.0)) # Passing a non-iterable: with self.assertRaises(TypeError): calc.compute([1], [2])
def test_valid(self): inputs = [ ((190, 1, 0), (190, 1, 0), 0), ((190, 1, 0), (190, 1, 1), 1 / 3), ((190, 0, 1), (190, 1, 0), 2 / 3), ((190, 0, 1), (191, 1, 0), 1), ((190, 0, 1), (190, 0, 1, 0), 1 / 4), ((190, ), (), 1), ((), (), 1) ] for left, right, result in inputs: self.assertTrue( float_cmp(compare_single_path(left, right), result) and float_cmp(compare_single_path(right, left), result) )
def test_weight(self): dist = Distance(self._session, {'genre': 1.0}) self.assertAlmostEqual(dist.distance, 1.0) dist = Distance(self._session, {'random': 1.0}) self.assertAlmostEqual(dist.distance, 1.0) dist = Distance(self._session, {'genre': 1.0, 'random': 1.0}) self.assertAlmostEqual(dist.distance, 1.0) dist = Distance(self._session, {'genre': 1.0, 'random': 0.0}) self.assertAlmostEqual(dist.distance, 5 / 6) dist = Distance(self._session, {'genre': 0.0, 'random': 0.0}) self.assertAlmostEqual(dist.distance, 0.0) # Compute it manually: dist = Distance(self._session, {'genre': 0.5, 'random': 0.1}) self.assertTrue(float_cmp(dist.distance, (0.5 * 0.5 + 0.1 * 0.1) / 0.6))
def do_compute(self, lefts, rights): """Calculate distance between two genre paths by using complete linkage. :param lefts: A list of Genre Paths. :param rights: A list of Genre Paths to compare with. :returns: A distance between 0.0 and 1.0 (max diversity.) """ min_dist = 1.0 for left, right in product(lefts, rights): min_dist = min(min_dist, compare_single_path(left, right)) # Optimization: Often we get a low value early. if float_cmp(min_dist, 0.0): break return min_dist
def test_weight(self): dist = Distance(self._session, {'genre': 1.0}) self.assertAlmostEqual(dist.distance, 1.0) dist = Distance(self._session, {'random': 1.0}) self.assertAlmostEqual(dist.distance, 1.0) dist = Distance(self._session, {'genre': 1.0, 'random': 1.0}) self.assertAlmostEqual(dist.distance, 1.0) dist = Distance(self._session, {'genre': 1.0, 'random': 0.0}) self.assertAlmostEqual(dist.distance, 5 / 6) dist = Distance(self._session, {'genre': 0.0, 'random': 0.0}) self.assertAlmostEqual(dist.distance, 0.0) # Compute it manually: dist = Distance(self._session, {'genre': 0.5, 'random': 0.1}) self.assertTrue( float_cmp(dist.distance, (0.5 * 0.5 + 0.1 * 0.1) / 0.6))
def do_compute(self, lefts, rights): if not lefts or not rights: return 1.0 left_lang, lefts = lefts right_lang, rights = rights if right_lang != left_lang: return 1.0 min_distance = 1.0 for kwa, kwb in product(lefts, rights): # common = sum(1 for _ in takewhile(lambda t: t[0] == t[1], zip(kwa, kwb))) distance = 1.0 - len(kwa & kwb) / max(4, max(len(kwa), len(kwb))) min_distance = min(distance, min_distance) if float_cmp(distance, 0.0): break return 0.67 * min_distance + 0.33 * (not right_lang == left_lang)
def full_cross_compare(expected): self.assertTrue(float_cmp(calc.compute(a, b), expected)) self.assertTrue(float_cmp(calc.compute(b, a), expected)) self.assertTrue(float_cmp(calc.compute(a, a), 0.0)) self.assertTrue(float_cmp(calc.compute(b, b), 0.0))
def __eq__(self, other): return float_cmp(self.distance, other.distance)