def count_ways_to_obtain_largest_subpopulation(n, m): """Return dict of number of ways to obtain largest subpopulation. Inputs :n: total number (e.g., total number of highest scoring results) :m: number of non-negative integers to sum to n (e.g., number of workers) Output :ways: dictionary whose keys are the maximum value of a multiset and whose values are the sum of each distinct ordering of results, corresponding to an arrangement of a multiset, computed over all arrangements of all multisets sharing a maximum value. Implementation Although Multiset.uniq_msets() returns tuples in lexicographical order, this implementation would function regardless of order. """ mset = Multiset(n) ways = defaultdict(int) for grp in mset.uniq_msets(n, m): ways[max(grp)] += (mset.multinomial_coeff(grp) * mset.number_of_arrangements(grp)) return ways
def run_example(): """Demonstrate sample outputs. :: >> run_example() # ADD > to re-activate doctest (runs in ~20 sec) Short example, involving 108 multisets Printing the probability of missing 1 or more results from the top 20 results, given 4 workers, as a function of the number of top results requested per worker. Probability of 5 or more of top 20 from one of 4 sets is 1.0000e+00. Probability of 6 or more of top 20 from one of 4 sets is 9.8933e-01. Probability of 7 or more of top 20 from one of 4 sets is 7.5516e-01. Probability of 8 or more of top 20 from one of 4 sets is 3.9874e-01. Probability of 9 or more of top 20 from one of 4 sets is 1.6346e-01. Probability of 10 or more of top 20 from one of 4 sets is 5.5457e-02. Probability of 11 or more of top 20 from one of 4 sets is 1.5769e-02. Probability of 12 or more of top 20 from one of 4 sets is 3.7416e-03. Probability of 13 or more of top 20 from one of 4 sets is 7.3482e-04. Probability of 14 or more of top 20 from one of 4 sets is 1.1805e-04. Probability of 15 or more of top 20 from one of 4 sets is 1.5252e-05. Probability of 16 or more of top 20 from one of 4 sets is 1.5461e-06. Probability of 17 or more of top 20 from one of 4 sets is 1.1842e-07. Probability of 18 or more of top 20 from one of 4 sets is 6.4429e-09. Probability of 19 or more of top 20 from one of 4 sets is 2.2192e-10. Probability of 20 or more of top 20 from one of 4 sets is 3.6380e-12. computing longer example, involving 6292069 multisets ... Longer example Chance of omitting documents from top 100 when returning 20 results from each of 10 workers is 8.0721981476e-03 """ mset = Multiset() n1, m1 = 20, 4 print """Short example, involving %d multisets Printing the probability of missing 1 or more results from the top %d results, given %d workers, as a function of the number of top results requested per worker.""" % (mset.num_uniq_msets(total=n1, length=m1), n1, m1) print_cumulative_prob(n=n1, m=m1) n2, m2 = 100, 10 num_docs = 20 num_ms = mset.num_uniq_msets(total=n2, length=m2) print 'computing longer example, involving %d multisets ...' % num_ms # add one because result is omitted only when set size exceeds request for stats in compute_probabilities(n=n2, m=m2, t=num_docs + 1): if stats['count'] == num_docs + 1: print ' '.join(['Longer example\nChance of omitting documents', 'from top %d when returning %d results\nfrom each of', '%d workers is %0.10e']) % (n2, num_docs, m2, stats['p'])
def compute_probabilities(n, m, t=()): """Compute probability that a result is missed. Inputs :n: total number (e.g., total number of highest scoring results) :m: number of non-negative integers to sum to n (e.g., number of workers, each returning an integer number of results) :t: optional threshold to short-circuit computation * integer t is the maximum number of results to return per worker Output :stats: dict containing fields: * count is the the number of results returned per worker * n is the total number of highest scoring results * m is the number of workers * p is the cumulative probability that a result is missed """ if not is_nonneg_int(t): t = () numerator = m ** n denominator = float(numerator) stats = {'n': n, 'm': m, 'count': 0, 'p': 0} mset = Multiset(n) for (cnt, ways) in mset.num_ways(n, m): stats['count'] = cnt stats['p'] = numerator / denominator if cnt < t: yield stats.copy() elif cnt == t: yield stats.copy() raise StopIteration else: raise StopIteration numerator -= ways
def setUp(self): self.mset = Multiset()
class TestMultisetMath(unittest.TestCase): """Test Multiset calculations.""" def setUp(self): self.mset = Multiset() def tearDown(self): self.mset.clear() self.mset = None def test_factorial_random_inputs(self): """Test factorial random inputs.""" for val in random.sample(xrange(300), 5): result = self.mset.factorial(val) expected = math.factorial(val) self.assertEqual(result, expected) def test_factorial_bad_inputs(self): """Test factorial bad inputs.""" inputs = (-1, None) for value in inputs: self.assertRaises(ValueError, self.mset.factorial, value) def test_factorial_small_inputs(self): """Test factorial small inputs.""" pairs = ((0, 1), (1, 1), (2, 2), (3, 6)) for (value, expected) in pairs: self.assertEqual(self.mset.factorial(value), expected) def test_clear_method(self): """Test clear method.""" self.mset.factorial(10) self.assertTrue(len(self.mset._data) > 10) self.mset.clear() self.assertTrue(len(self.mset._data) == 1) def test_is_nonneg_int_on_several_inputs(self): """Test is_nonneg_int on several inputs.""" pairs = ((None, False), (-1, False), (0, True), (1, True), (5.0, True)) for (value, expected) in pairs: self.assertEqual(is_nonneg_int(value), expected) def test_uniq_msets_on_bad_input(self): """Test uniq_msets on on bad input.""" f = lambda total, length: list(self.mset.uniq_msets(total, length)) self.assertRaises(TypeError, f, 10, None) self.assertRaises(ValueError, f, -3, 2) def test_uniq_msets_on_several_inputs(self): """Test uniq_msets on on several inputs.""" pairs = {(10, 0): [()], (10, 1): [(10, )]} for (value, expected) in pairs.items(): result = list(self.mset.uniq_msets(*value)) self.assertEqual(result, expected) def test_uniq_msets_contains_unique_elements(self): """Test uniq_msets contains unique elements.""" expected = set([(3, 2), (4, 1), (5, 0)]) result = set(self.mset.uniq_msets(5, 2)) self.assertEqual(result, expected) def test_uniq_msets_contains_correct_number_of_elements(self): """Test uniq_msets contains correct number of elements.""" result = list(self.mset.uniq_msets(5, 2)) self.assertEqual(len(result), len(set(result))) def test_num_ways_n_tuple_key(self): """Test num_ways n tuple key.""" expected = (4, 5, 5) num_ways = self.mset.num_ways result = tuple(len(list(num_ways(4, 4, x))) for x in xrange(1, 4)) self.assertEqual(result, expected) def test_number_of_arrangements_bad_input(self): """Test number_of_arrangements bad input.""" num_arrange = self.mset.number_of_arrangements self.assertRaises(TypeError, num_arrange, 5) self.assertRaises(TypeError, num_arrange, None) self.assertRaises(ValueError, num_arrange, ()) def test_number_of_arrangements_good_input(self): """Test number_of_arrangements good input.""" pairs = (((3, ), 1), ((2, 3), 2), ((1, 2, 3), 6)) num_arrange = self.mset.number_of_arrangements for (value, expected) in pairs: self.assertEqual(num_arrange(value), expected) def test_iterate_through_number_of_arrangements_list_input(self): """Test iterate through number_of_arrangements list input.""" groups = [(0, 5), (1, 4), (2, 3)] result = dict( (grp, self.mset.number_of_arrangements(grp)) for grp in groups) expected = {(0, 5): 2, (1, 4): 2, (2, 3): 2} self.assertEqual(result, expected) def test_iterate_through_number_of_arrangements_by_uniq_msets(self): """Test iterate through number_of_arrangements by uniq_msets.""" result = dict((grp, self.mset.number_of_arrangements(grp)) for grp in self.mset.uniq_msets(5, 2)) expected = {(5, 0): 2, (4, 1): 2, (3, 2): 2} self.assertEqual(result, expected) def test_multinomial_coeff_bad_inputs(self): """Test multinomial_coeff bad inputs.""" m_coeff = self.mset.multinomial_coeff self.assertRaises(TypeError, m_coeff, None) self.assertRaises(ValueError, m_coeff, ()) def test_multinomial_coeff_good_inputs(self): """Test multinomial_coeff good inputs.""" pairs = (((0, ), 1), ((3, ), 1), ((2, 3), 10), ((1, 2, 3), 60)) m_coeff = self.mset.multinomial_coeff for (value, expected) in pairs: self.assertEqual(m_coeff(value), expected) def test_number_arrangements_of_uniq_msets_is_mset_number(self): """Test number_arrangements of uniq_msets is mset number.""" for n in (5, 15, 30): for m in (3, 6): l1 = self.mset.multiset_number(n, m) l2 = sum( self.mset.number_of_arrangements(ms) for ms in self.mset.uniq_msets(n, m)) self.assertEqual(l1, l2) def test_num_uniq_msets_is_equal_to_calculated_number(self): """Test num_uniq_msets is equal to calculated number.""" for n in (5, 15, 30): for m in (3, 6): l1 = self.mset.num_uniq_msets(n, m) l2 = sum(1 for ms in self.mset.uniq_msets(n, m)) self.assertEqual(l1, l2)
class TestMultisetMath(unittest.TestCase): """Test Multiset calculations.""" def setUp(self): self.mset = Multiset() def tearDown(self): self.mset.clear() self.mset = None def test_factorial_random_inputs(self): """Test factorial random inputs.""" for val in random.sample(xrange(300), 5): result = self.mset.factorial(val) expected = math.factorial(val) self.assertEqual(result, expected) def test_factorial_bad_inputs(self): """Test factorial bad inputs.""" inputs = (-1, None) for value in inputs: self.assertRaises(ValueError, self.mset.factorial, value) def test_factorial_small_inputs(self): """Test factorial small inputs.""" pairs = ((0, 1), (1, 1), (2, 2), (3, 6)) for (value, expected) in pairs: self.assertEqual(self.mset.factorial(value), expected) def test_clear_method(self): """Test clear method.""" self.mset.factorial(10) self.assertTrue(len(self.mset._data) > 10) self.mset.clear() self.assertTrue(len(self.mset._data) == 1) def test_is_nonneg_int_on_several_inputs(self): """Test is_nonneg_int on several inputs.""" pairs = ((None, False), (-1, False), (0, True), (1, True), (5.0, True)) for (value, expected) in pairs: self.assertEqual(is_nonneg_int(value), expected) def test_uniq_msets_on_bad_input(self): """Test uniq_msets on on bad input.""" f = lambda total, length: list(self.mset.uniq_msets(total, length)) self.assertRaises(TypeError, f, 10, None) self.assertRaises(ValueError, f, -3, 2) def test_uniq_msets_on_several_inputs(self): """Test uniq_msets on on several inputs.""" pairs = {(10, 0): [()], (10, 1): [(10,)]} for (value, expected) in pairs.items(): result = list(self.mset.uniq_msets(*value)) self.assertEqual(result, expected) def test_uniq_msets_contains_unique_elements(self): """Test uniq_msets contains unique elements.""" expected = set([(3, 2), (4, 1), (5, 0)]) result = set(self.mset.uniq_msets(5, 2)) self.assertEqual(result, expected) def test_uniq_msets_contains_correct_number_of_elements(self): """Test uniq_msets contains correct number of elements.""" result = list(self.mset.uniq_msets(5, 2)) self.assertEqual(len(result), len(set(result))) def test_num_ways_n_tuple_key(self): """Test num_ways n tuple key.""" expected = (4, 5, 5) num_ways = self.mset.num_ways result = tuple(len(list(num_ways(4, 4, x))) for x in xrange(1,4)) self.assertEqual(result, expected) def test_number_of_arrangements_bad_input(self): """Test number_of_arrangements bad input.""" num_arrange = self.mset.number_of_arrangements self.assertRaises(TypeError, num_arrange, 5) self.assertRaises(TypeError, num_arrange, None) self.assertRaises(ValueError, num_arrange, ()) def test_number_of_arrangements_good_input(self): """Test number_of_arrangements good input.""" pairs = (((3,), 1), ((2, 3), 2), ((1, 2, 3), 6)) num_arrange = self.mset.number_of_arrangements for (value, expected) in pairs: self.assertEqual(num_arrange(value), expected) def test_iterate_through_number_of_arrangements_list_input(self): """Test iterate through number_of_arrangements list input.""" groups = [(0, 5), (1, 4), (2, 3)] result = dict((grp, self.mset.number_of_arrangements(grp)) for grp in groups) expected = {(0, 5): 2, (1, 4): 2, (2, 3): 2} self.assertEqual(result, expected) def test_iterate_through_number_of_arrangements_by_uniq_msets(self): """Test iterate through number_of_arrangements by uniq_msets.""" result = dict((grp, self.mset.number_of_arrangements(grp)) for grp in self.mset.uniq_msets(5, 2)) expected = {(5, 0): 2, (4, 1): 2, (3, 2): 2} self.assertEqual(result, expected) def test_multinomial_coeff_bad_inputs(self): """Test multinomial_coeff bad inputs.""" m_coeff = self.mset.multinomial_coeff self.assertRaises(TypeError, m_coeff, None) self.assertRaises(ValueError, m_coeff, ()) def test_multinomial_coeff_good_inputs(self): """Test multinomial_coeff good inputs.""" pairs = (((0,), 1), ((3,), 1), ((2, 3), 10), ((1, 2, 3), 60)) m_coeff = self.mset.multinomial_coeff for (value, expected) in pairs: self.assertEqual(m_coeff(value), expected) def test_number_arrangements_of_uniq_msets_is_mset_number(self): """Test number_arrangements of uniq_msets is mset number.""" for n in (5, 15, 30): for m in (3, 6): l1 = self.mset.multiset_number(n, m) l2 = sum(self.mset.number_of_arrangements(ms) for ms in self.mset.uniq_msets(n, m)) self.assertEqual(l1, l2) def test_num_uniq_msets_is_equal_to_calculated_number(self): """Test num_uniq_msets is equal to calculated number.""" for n in (5, 15, 30): for m in (3, 6): l1 = self.mset.num_uniq_msets(n, m) l2 = sum(1 for ms in self.mset.uniq_msets(n, m)) self.assertEqual(l1, l2)