예제 #1
0
def count_ways_to_obtain_largest_subpopulation(n, m):
    """Return dict of number of ways to obtain largest subpopulation.

    Inputs
      :n: total number (e.g., total number of highest scoring results)
      :m: number of non-negative integers to sum to n (e.g., number of
          workers)

    Output
      :ways: dictionary whose keys are the maximum value of a multiset
             and whose values are the sum of each distinct ordering of
             results, corresponding to an arrangement of a multiset,
             computed over all arrangements of all multisets sharing
             a maximum value.

    Implementation
        Although Multiset.uniq_msets() returns tuples in lexicographical
        order, this implementation would function regardless of order.

    """
    mset = Multiset(n)
    ways = defaultdict(int)
    for grp in mset.uniq_msets(n, m):
        ways[max(grp)] += (mset.multinomial_coeff(grp) *
                           mset.number_of_arrangements(grp))
    return ways
예제 #2
0
def run_example():
    """Demonstrate sample outputs.

    ::

        >> run_example()    # ADD > to re-activate doctest (runs in ~20 sec)
        Short example, involving 108 multisets
        Printing the probability of missing 1 or more results from the top 20
        results, given 4 workers, as a function of the number of top results
        requested per worker.
        Probability of  5 or more of top 20 from one of 4 sets is 1.0000e+00.
        Probability of  6 or more of top 20 from one of 4 sets is 9.8933e-01.
        Probability of  7 or more of top 20 from one of 4 sets is 7.5516e-01.
        Probability of  8 or more of top 20 from one of 4 sets is 3.9874e-01.
        Probability of  9 or more of top 20 from one of 4 sets is 1.6346e-01.
        Probability of 10 or more of top 20 from one of 4 sets is 5.5457e-02.
        Probability of 11 or more of top 20 from one of 4 sets is 1.5769e-02.
        Probability of 12 or more of top 20 from one of 4 sets is 3.7416e-03.
        Probability of 13 or more of top 20 from one of 4 sets is 7.3482e-04.
        Probability of 14 or more of top 20 from one of 4 sets is 1.1805e-04.
        Probability of 15 or more of top 20 from one of 4 sets is 1.5252e-05.
        Probability of 16 or more of top 20 from one of 4 sets is 1.5461e-06.
        Probability of 17 or more of top 20 from one of 4 sets is 1.1842e-07.
        Probability of 18 or more of top 20 from one of 4 sets is 6.4429e-09.
        Probability of 19 or more of top 20 from one of 4 sets is 2.2192e-10.
        Probability of 20 or more of top 20 from one of 4 sets is 3.6380e-12.
        computing longer example, involving 6292069 multisets ...
        Longer example
        Chance of omitting documents from top 100 when returning 20 results
        from each of 10 workers is 8.0721981476e-03

    """

    mset = Multiset()
    n1, m1 = 20, 4
    print """Short example, involving %d multisets
Printing the probability of missing 1 or more results from the top %d
results, given %d workers, as a function of the number of top results
requested per worker.""" % (mset.num_uniq_msets(total=n1, length=m1),
        n1, m1)
    print_cumulative_prob(n=n1, m=m1)

    n2, m2 = 100, 10
    num_docs = 20
    num_ms = mset.num_uniq_msets(total=n2, length=m2)
    print 'computing longer example, involving %d multisets ...' % num_ms
    # add one because result is omitted only when set size exceeds request
    for stats in compute_probabilities(n=n2, m=m2, t=num_docs + 1):
        if stats['count'] == num_docs + 1:
            print ' '.join(['Longer example\nChance of omitting documents',
                'from top %d when returning %d results\nfrom each of',
                '%d workers is %0.10e']) % (n2, num_docs, m2, stats['p'])
예제 #3
0
def compute_probabilities(n, m, t=()):
    """Compute probability that a result is missed.

    Inputs
      :n: total number (e.g., total number of highest scoring results)
      :m: number of non-negative integers to sum to n (e.g., number of
          workers, each returning an integer number of results)
      :t: optional threshold to short-circuit computation
          * integer t is the maximum number of results to return per worker

    Output
      :stats: dict containing fields:
          * count is the the number of results returned per worker
          * n is the total number of highest scoring results
          * m is the number of workers
          * p is the cumulative probability that a result is missed

    """

    if not is_nonneg_int(t):
        t = ()
    numerator = m ** n
    denominator = float(numerator)
    stats = {'n': n, 'm': m, 'count': 0, 'p': 0}
    mset = Multiset(n)
    for (cnt, ways) in mset.num_ways(n, m):
        stats['count'] = cnt
        stats['p'] = numerator / denominator
        if cnt < t:
            yield stats.copy()
        elif cnt == t:
            yield stats.copy()
            raise StopIteration
        else:
            raise StopIteration
        numerator -= ways
예제 #4
0
 def setUp(self):
     self.mset = Multiset()
예제 #5
0
class TestMultisetMath(unittest.TestCase):
    """Test Multiset calculations."""
    def setUp(self):
        self.mset = Multiset()

    def tearDown(self):
        self.mset.clear()
        self.mset = None

    def test_factorial_random_inputs(self):
        """Test factorial random inputs."""
        for val in random.sample(xrange(300), 5):
            result = self.mset.factorial(val)
            expected = math.factorial(val)
            self.assertEqual(result, expected)

    def test_factorial_bad_inputs(self):
        """Test factorial bad inputs."""
        inputs = (-1, None)
        for value in inputs:
            self.assertRaises(ValueError, self.mset.factorial, value)

    def test_factorial_small_inputs(self):
        """Test factorial small inputs."""
        pairs = ((0, 1), (1, 1), (2, 2), (3, 6))
        for (value, expected) in pairs:
            self.assertEqual(self.mset.factorial(value), expected)

    def test_clear_method(self):
        """Test clear method."""
        self.mset.factorial(10)
        self.assertTrue(len(self.mset._data) > 10)
        self.mset.clear()
        self.assertTrue(len(self.mset._data) == 1)

    def test_is_nonneg_int_on_several_inputs(self):
        """Test is_nonneg_int on several inputs."""
        pairs = ((None, False), (-1, False), (0, True), (1, True), (5.0, True))
        for (value, expected) in pairs:
            self.assertEqual(is_nonneg_int(value), expected)

    def test_uniq_msets_on_bad_input(self):
        """Test uniq_msets on on bad input."""
        f = lambda total, length: list(self.mset.uniq_msets(total, length))
        self.assertRaises(TypeError, f, 10, None)
        self.assertRaises(ValueError, f, -3, 2)

    def test_uniq_msets_on_several_inputs(self):
        """Test uniq_msets on on several inputs."""
        pairs = {(10, 0): [()], (10, 1): [(10, )]}
        for (value, expected) in pairs.items():
            result = list(self.mset.uniq_msets(*value))
            self.assertEqual(result, expected)

    def test_uniq_msets_contains_unique_elements(self):
        """Test uniq_msets contains unique elements."""
        expected = set([(3, 2), (4, 1), (5, 0)])
        result = set(self.mset.uniq_msets(5, 2))
        self.assertEqual(result, expected)

    def test_uniq_msets_contains_correct_number_of_elements(self):
        """Test uniq_msets contains correct number of elements."""
        result = list(self.mset.uniq_msets(5, 2))
        self.assertEqual(len(result), len(set(result)))

    def test_num_ways_n_tuple_key(self):
        """Test num_ways n tuple key."""
        expected = (4, 5, 5)
        num_ways = self.mset.num_ways
        result = tuple(len(list(num_ways(4, 4, x))) for x in xrange(1, 4))
        self.assertEqual(result, expected)

    def test_number_of_arrangements_bad_input(self):
        """Test number_of_arrangements bad input."""
        num_arrange = self.mset.number_of_arrangements
        self.assertRaises(TypeError, num_arrange, 5)
        self.assertRaises(TypeError, num_arrange, None)
        self.assertRaises(ValueError, num_arrange, ())

    def test_number_of_arrangements_good_input(self):
        """Test number_of_arrangements good input."""
        pairs = (((3, ), 1), ((2, 3), 2), ((1, 2, 3), 6))
        num_arrange = self.mset.number_of_arrangements
        for (value, expected) in pairs:
            self.assertEqual(num_arrange(value), expected)

    def test_iterate_through_number_of_arrangements_list_input(self):
        """Test iterate through number_of_arrangements list input."""
        groups = [(0, 5), (1, 4), (2, 3)]
        result = dict(
            (grp, self.mset.number_of_arrangements(grp)) for grp in groups)
        expected = {(0, 5): 2, (1, 4): 2, (2, 3): 2}
        self.assertEqual(result, expected)

    def test_iterate_through_number_of_arrangements_by_uniq_msets(self):
        """Test iterate through number_of_arrangements by uniq_msets."""
        result = dict((grp, self.mset.number_of_arrangements(grp))
                      for grp in self.mset.uniq_msets(5, 2))
        expected = {(5, 0): 2, (4, 1): 2, (3, 2): 2}
        self.assertEqual(result, expected)

    def test_multinomial_coeff_bad_inputs(self):
        """Test multinomial_coeff bad inputs."""
        m_coeff = self.mset.multinomial_coeff
        self.assertRaises(TypeError, m_coeff, None)
        self.assertRaises(ValueError, m_coeff, ())

    def test_multinomial_coeff_good_inputs(self):
        """Test multinomial_coeff good inputs."""
        pairs = (((0, ), 1), ((3, ), 1), ((2, 3), 10), ((1, 2, 3), 60))
        m_coeff = self.mset.multinomial_coeff
        for (value, expected) in pairs:
            self.assertEqual(m_coeff(value), expected)

    def test_number_arrangements_of_uniq_msets_is_mset_number(self):
        """Test number_arrangements of uniq_msets is mset number."""
        for n in (5, 15, 30):
            for m in (3, 6):
                l1 = self.mset.multiset_number(n, m)
                l2 = sum(
                    self.mset.number_of_arrangements(ms)
                    for ms in self.mset.uniq_msets(n, m))
                self.assertEqual(l1, l2)

    def test_num_uniq_msets_is_equal_to_calculated_number(self):
        """Test num_uniq_msets is equal to calculated number."""
        for n in (5, 15, 30):
            for m in (3, 6):
                l1 = self.mset.num_uniq_msets(n, m)
                l2 = sum(1 for ms in self.mset.uniq_msets(n, m))
                self.assertEqual(l1, l2)
예제 #6
0
 def setUp(self):
     self.mset = Multiset()
예제 #7
0
class TestMultisetMath(unittest.TestCase):

    """Test Multiset calculations."""

    def setUp(self):
        self.mset = Multiset()

    def tearDown(self):
        self.mset.clear()
        self.mset = None

    def test_factorial_random_inputs(self):
        """Test factorial random inputs."""
        for val in random.sample(xrange(300), 5):
            result = self.mset.factorial(val)
            expected = math.factorial(val)
            self.assertEqual(result, expected)

    def test_factorial_bad_inputs(self):
        """Test factorial bad inputs."""
        inputs = (-1, None)
        for value in inputs:
            self.assertRaises(ValueError, self.mset.factorial, value)

    def test_factorial_small_inputs(self):
        """Test factorial small inputs."""
        pairs = ((0, 1), (1, 1), (2, 2), (3, 6))
        for (value, expected) in pairs:
            self.assertEqual(self.mset.factorial(value), expected)

    def test_clear_method(self):
        """Test clear method."""
        self.mset.factorial(10)
        self.assertTrue(len(self.mset._data) > 10)
        self.mset.clear()
        self.assertTrue(len(self.mset._data) == 1)

    def test_is_nonneg_int_on_several_inputs(self):
        """Test is_nonneg_int on several inputs."""
        pairs = ((None, False), (-1, False),
            (0, True), (1, True), (5.0, True))
        for (value, expected) in pairs:
            self.assertEqual(is_nonneg_int(value), expected)

    def test_uniq_msets_on_bad_input(self):
        """Test uniq_msets on on bad input."""
        f = lambda total, length: list(self.mset.uniq_msets(total, length))
        self.assertRaises(TypeError, f, 10, None)
        self.assertRaises(ValueError, f, -3, 2)

    def test_uniq_msets_on_several_inputs(self):
        """Test uniq_msets on on several inputs."""
        pairs = {(10, 0): [()], (10, 1): [(10,)]}
        for (value, expected) in pairs.items():
            result = list(self.mset.uniq_msets(*value))
            self.assertEqual(result, expected)

    def test_uniq_msets_contains_unique_elements(self):
        """Test uniq_msets contains unique elements."""
        expected = set([(3, 2), (4, 1), (5, 0)])
        result = set(self.mset.uniq_msets(5, 2))
        self.assertEqual(result, expected)

    def test_uniq_msets_contains_correct_number_of_elements(self):
        """Test uniq_msets contains correct number of elements."""
        result = list(self.mset.uniq_msets(5, 2))
        self.assertEqual(len(result), len(set(result)))

    def test_num_ways_n_tuple_key(self):
        """Test num_ways n tuple key."""
        expected = (4, 5, 5)
        num_ways = self.mset.num_ways
        result = tuple(len(list(num_ways(4, 4, x))) for x in xrange(1,4))
        self.assertEqual(result, expected)

    def test_number_of_arrangements_bad_input(self):
        """Test number_of_arrangements bad input."""
        num_arrange = self.mset.number_of_arrangements
        self.assertRaises(TypeError, num_arrange, 5)
        self.assertRaises(TypeError, num_arrange, None)
        self.assertRaises(ValueError, num_arrange, ())

    def test_number_of_arrangements_good_input(self):
        """Test number_of_arrangements good input."""
        pairs = (((3,), 1), ((2, 3), 2), ((1, 2, 3), 6))
        num_arrange = self.mset.number_of_arrangements
        for (value, expected) in pairs:
            self.assertEqual(num_arrange(value), expected)

    def test_iterate_through_number_of_arrangements_list_input(self):
        """Test iterate through number_of_arrangements list input."""
        groups = [(0, 5), (1, 4), (2, 3)]
        result = dict((grp, self.mset.number_of_arrangements(grp))
                    for grp in groups)
        expected = {(0, 5): 2, (1, 4): 2, (2, 3): 2}
        self.assertEqual(result, expected)

    def test_iterate_through_number_of_arrangements_by_uniq_msets(self):
        """Test iterate through number_of_arrangements by uniq_msets."""
        result = dict((grp, self.mset.number_of_arrangements(grp))
                    for grp in self.mset.uniq_msets(5, 2))
        expected = {(5, 0): 2, (4, 1): 2, (3, 2): 2}
        self.assertEqual(result, expected)

    def test_multinomial_coeff_bad_inputs(self):
        """Test multinomial_coeff bad inputs."""
        m_coeff = self.mset.multinomial_coeff
        self.assertRaises(TypeError, m_coeff, None)
        self.assertRaises(ValueError, m_coeff, ())

    def test_multinomial_coeff_good_inputs(self):
        """Test multinomial_coeff good inputs."""
        pairs = (((0,), 1), ((3,), 1), ((2, 3), 10), ((1, 2, 3), 60))
        m_coeff = self.mset.multinomial_coeff
        for (value, expected) in pairs:
            self.assertEqual(m_coeff(value), expected)

    def test_number_arrangements_of_uniq_msets_is_mset_number(self):
        """Test number_arrangements of uniq_msets is mset number."""
        for n in (5, 15, 30):
            for m in (3, 6):
                l1 = self.mset.multiset_number(n, m)
                l2 = sum(self.mset.number_of_arrangements(ms)
                    for ms in self.mset.uniq_msets(n, m))
                self.assertEqual(l1, l2)

    def test_num_uniq_msets_is_equal_to_calculated_number(self):
        """Test num_uniq_msets is equal to calculated number."""
        for n in (5, 15, 30):
            for m in (3, 6):
                l1 = self.mset.num_uniq_msets(n, m)
                l2 = sum(1 for ms in self.mset.uniq_msets(n, m))
                self.assertEqual(l1, l2)