def get_disagreement(self, ock): ndotdot = sum([ sum([ock[f1][f2] for f2 in self.flat_data]) for f1 in self.flat_data ]) distanced_ock_sum = sum([ sum([ock[f1][f2] * Util.distance(f1, f2) for f2 in self.flat_data]) for f1 in self.flat_data ]) return distanced_ock_sum / ndotdot
def test_distance(self): print print "{0}testing distance{0}".format("*" * 6) d1 = () print "distance({{}}, {{}}) = {0}".format(Util.distance(d1, d1)) self.assertEquals(Util.distance(d1, d1), 0, "distance({Empty}, {Empty}) = 0") d2 = ("Funk", "Rock") print "distance({{Funk}}, {{Rock}}) = {0}".format(Util.distance(d1, d2)) self.assertEquals(Util.distance(d1, d2), 1.0, "distance({Empty}, {Rock, Funk}) = 1") d3 = ("Funk", "Rock") d4 = ("Funk", "Jazz", "Rock") print "distance({{Funk, Rock}}, {{Funk, Rock, Jazz}}) = {0}".format(Util.distance(d3, d4)) self.assertAlmostEqual(Util.distance(d3, d4), 0.2, 5, "distance({Funk, Rock}, {Funk, Rock, Jazz}) = 0.2") self.assertEquals(Util.distance(d3, d4), Util.distance(d4, d3), "distance is symmetric") print "{0}tested distance{0}".format("*" * 6) print
def get_single_case_ed(self, item1, item2, p, n, d): """ @param item1: multi-value item1 @param item2: multi-value item2 @param p: output of get_num_p @param n: output of get_single_item_n @param d: output of get_ways_of_pair @return: expected disagreement of item1 and item2 (note: this is not symmetric, i.e., get_single_case_ed(item1, item2) != get_single_case_ed(item2, item1) """ def get_complementary_set(t): return [x for x in n.iterkeys() if x not in t] def intersection(t, s): return [tt for tt in t if tt in s] if not p.has_key(len(item1)): return 0 else: ratio1 = p[len(item1)] if not p.has_key(len(item2)): return 0 else: ratio2 = p[len(item2)] if d[len(item1)][len(item2)] == 0: denominator = 1 else: denominator = d[len(item1)][len(item2)] if item1 == () and item2 == (): numerator = n[()] * (n[()] - 1) elif item1 == (): numerator = n[()] * Util.prod([n[x] for x in item2]) elif item2 == (): numerator = Util.prod([n[x] for x in item1]) * n[()] else: numerator = Util.prod([n[x] for x in item1]) * Util.prod([ n[x] for x in intersection(item2, get_complementary_set(item1)) ]) * Util.prod([(n[x] - 1) for x in intersection(item1, item2)]) delta = Util.distance(item1, item2) return ratio1 * ratio2 * numerator * delta / denominator
def get_single_case_ed(self, item1, item2, p, n, d): """ @param item1: multi-value item1 @param item2: multi-value item2 @param p: output of get_num_p @param n: output of get_single_item_n @param d: output of get_ways_of_pair @return: expected disagreement of item1 and item2 (note: this is not symmetric, i.e., get_single_case_ed(item1, item2) != get_single_case_ed(item2, item1) """ def get_complementary_set(t): return [x for x in n.iterkeys() if x not in t] def intersection(t, s): return [tt for tt in t if tt in s] if not p.has_key(len(item1)): return 0 else: ratio1 = p[len(item1)] if not p.has_key(len(item2)): return 0 else: ratio2 = p[len(item2)] if d[len(item1)][len(item2)] == 0: denominator = 1 else: denominator = d[len(item1)][len(item2)] if item1 == () and item2 == (): numerator = n[()] * (n[()] - 1) elif item1 == (): numerator = n[()] * Util.prod([n[x] for x in item2]) elif item2 == (): numerator = Util.prod([n[x] for x in item1]) * n[()] else: numerator = Util.prod([n[x] for x in item1]) * Util.prod([n[x] for x in intersection(item2, get_complementary_set(item1))]) * Util.prod([(n[x] - 1) for x in intersection(item1, item2)]) delta = Util.distance(item1, item2) return ratio1 * ratio2 * numerator * delta / denominator
def test_distance(self): print print "{0}testing distance{0}".format("*" * 6) d1 = () print "distance({{}}, {{}}) = {0}".format(Util.distance(d1, d1)) self.assertEquals(Util.distance(d1, d1), 0, "distance({Empty}, {Empty}) = 0") d2 = ("Funk", "Rock") print "distance({{Funk}}, {{Rock}}) = {0}".format(Util.distance( d1, d2)) self.assertEquals(Util.distance(d1, d2), 1.0, "distance({Empty}, {Rock, Funk}) = 1") d3 = ("Funk", "Rock") d4 = ("Funk", "Jazz", "Rock") print "distance({{Funk, Rock}}, {{Funk, Rock, Jazz}}) = {0}".format( Util.distance(d3, d4)) self.assertAlmostEqual( Util.distance(d3, d4), 0.2, 5, "distance({Funk, Rock}, {Funk, Rock, Jazz}) = 0.2") self.assertEquals(Util.distance(d3, d4), Util.distance(d4, d3), "distance is symmetric") print "{0}tested distance{0}".format("*" * 6) print
def get_disagreement(self, ock): ndotdot = sum([sum([ock[f1][f2] for f2 in self.flat_data]) for f1 in self.flat_data]) distanced_ock_sum = sum([sum([ock[f1][f2] * Util.distance(f1, f2) for f2 in self.flat_data]) for f1 in self.flat_data]) return distanced_ock_sum / ndotdot