Example #1
0
    def test_choice(self):
        """ test that choice() works correctly.
        
        Since WeightedChoice is a weighted random sampler, we can't rely on
        getting exact values out, so repeated samples are expected to obtain
        proportions of values equivalent to their weight value. The difference
        to the expected proportion minimises with larger sample sets, but at
        the cost of making the test hang for > 1 second for 1 million samples,
        or > 10 s for 10 million samples.
        """

        iterations = 1000000

        choices = WeightedChoice()
        choices.add_choice(1, 1)
        choices.add_choice(2, 5)
        s = [choices.choice() for x in range(iterations)]
        self.assertAlmostEqual(s.count(1) / len(s), 0.1667, places=2)

        # add another choice, then check that all of the choices have been
        # sampled at the expecetd proportions
        choices.add_choice(3, 4)
        s = [choices.choice() for x in range(iterations)]
        self.assertAlmostEqual(s.count(1) / len(s), 0.100, places=2)
        self.assertAlmostEqual(s.count(2) / len(s), 0.500, places=2)
        self.assertAlmostEqual(s.count(3) / len(s), 0.400, places=2)

        # check that all the choices have been made from the inserted values
        self.assertEqual(set(s), set([1, 2, 3]))
Example #2
0
    def test_choice_small_numbers(self):
        """ test that choice() works correctly.
        """

        iterations = 1000000

        # very small numbers at the end still have expected proportions
        choices = WeightedChoice()
        choices.add_choice(1, 1)
        choices.add_choice(2, 5)
        choices.add_choice(3, 0.0001)
        s = [choices.choice() for x in range(iterations)]
        self.assertAlmostEqual(s.count(3) / len(s), 0.0001, places=3)

        # very small numbers at the start still have expected proportions
        choices = WeightedChoice()
        choices.add_choice(1, 0.0001)
        choices.add_choice(2, 1)
        choices.add_choice(3, 5)
        s = [choices.choice() for x in range(iterations)]
        self.assertAlmostEqual(s.count(1) / len(s), 0.0001, places=3)

        # check that the sampling works correctly at low weight values
        choices = WeightedChoice()

        numbers = range(1000, 3000)
        small = [x * 0.000000000001 for x in numbers]
        for (name, prob) in zip(numbers, small):
            choices.add_choice(name, prob)

        s = [choices.choice() for x in range(iterations)]
        self.assertAlmostEqual(s.count(numbers[0]) / len(s), 0.0001, places=3)
Example #3
0
 def test_choice_small_numbers(self):
     """ test that choice() works correctly.
     """
     
     iterations = 1000000
     
     # very small numbers at the end still have expected proportions
     choices = WeightedChoice()
     choices.add_choice(1, 1)
     choices.add_choice(2, 5)
     choices.add_choice(3, 0.0001)
     s = [ choices.choice() for x in range(iterations) ]
     self.assertAlmostEqual(s.count(3)/len(s), 0.0001, places=3)
     
     # very small numbers at the start still have expected proportions
     choices = WeightedChoice()
     choices.add_choice(1, 0.0001)
     choices.add_choice(2, 1)
     choices.add_choice(3, 5)
     s = [ choices.choice() for x in range(iterations) ]
     self.assertAlmostEqual(s.count(1)/len(s), 0.0001, places=3)
     
     # check that the sampling works correctly at low weight values
     choices = WeightedChoice()
     
     numbers = range(1000, 3000)
     small = [ x * 0.000000000001 for x in numbers ]
     for (name, prob) in zip(numbers, small):
         choices.add_choice(name, prob)
     
     s = [ choices.choice() for x in range(iterations) ]
     self.assertAlmostEqual(s.count(numbers[0])/len(s), 0.0001, places=3)
Example #4
0
 def test_choice(self):
     """ test that choice() works correctly.
     
     Since WeightedChoice is a weighted random sampler, we can't rely on
     getting exact values out, so repeated samples are expected to obtain
     proportions of values equivalent to their weight value. The difference
     to the expected proportion minimises with larger sample sets, but at
     the cost of making the test hang for > 1 second for 1 million samples,
     or > 10 s for 10 million samples.
     """
     
     iterations = 1000000
     
     choices = WeightedChoice()
     choices.add_choice(1, 1)
     choices.add_choice(2, 5)
     s = [ choices.choice() for x in range(iterations) ]
     self.assertAlmostEqual(s.count(1)/len(s), 0.1667, places=2)
     
     # add another choice, then check that all of the choices have been
     # sampled at the expecetd proportions
     choices.add_choice(3, 4)
     s = [ choices.choice() for x in range(iterations) ]
     self.assertAlmostEqual(s.count(1)/len(s), 0.100, places=2)
     self.assertAlmostEqual(s.count(2)/len(s), 0.500, places=2)
     self.assertAlmostEqual(s.count(3)/len(s), 0.400, places=2)
     
     # check that all the choices have been made from the inserted values
     self.assertEqual(set(s), set([1, 2, 3]))
    def test_analyse_sample_zero(self):
        ''' test we raise an error if the de novo count is zero
        '''
        rates = WeightedChoice()
        rates.add_choice(200, 1e-5, 'A', 'G')
        rates.add_choice(201, 2e-5, 'C', 'T')

        severity = [5, 10]
        with self.assertRaises(ValueError):
            analyse(rates, severity, 0, 0, iterations=10000)
    def test_analyse_mismatch(self):
        ''' test for error when the rates and severity lengths are different
        '''

        rates = WeightedChoice()
        rates.add_choice(200, 1e-5, 'A', 'G')
        rates.add_choice(201, 2e-5, 'C', 'T')

        severity = [5, 10, 5]

        with self.assertRaises(ValueError):
            analyse(rates, severity, 8, 1, iterations=100000)
class TestSimulationsPy(unittest.TestCase):
    """ unit test the simulation functions
    """
    
    def setUp(self):
        """
        """
        
        # set up a range of possible positions, all with a uniform probability
        # of being selected
        self.choices = WeightedChoice()
        for x in range(1000):
            self.choices.add_choice(x, 0.0001)
        
        self.iterations = 100000
    
    def test_analyse_de_novos_dispersed(self):
        """ test analyse_de_novos() works correctly for dispersed de novos
        """
        
        # spread sites throughout a 1000 bp transcript
        positions = [100, 300, 600]
        distances = get_distances(positions)
        observed = geomean(distances)
        
        p_val = analyse_de_novos(self.choices, self.iterations, len(positions), observed)
        
        self.assertAlmostEqual(p_val, 0.635, places=2)
    
    def test_analyse_de_novos_clustered(self):
        """ test analyse_de_novos() works correctly for clustered de novos
        """
        
        # cluster sites within 20 bp in a 1000 bp transcript
        positions = [100, 110, 120]
        distances = get_distances(positions)
        observed = geomean(distances)
        
        p_val = analyse_de_novos(self.choices, 1000000, len(positions), observed)
        
        self.assertAlmostEqual(p_val, 0.002, places=3)
    
    def test_simulate_distribution(self):
        ''' check that simulate_distribution works correctly
        '''
        
        # repeated function calls should give different samples
        first = simulate_distribution(self.choices, iterations=5, de_novos_count=3)
        second = simulate_distribution(self.choices, iterations=5, de_novos_count=3)
        
        self.assertNotEqual(first, second)
Example #8
0
 def test_choice_with_alleles(self):
     """ test that choice_with_alleles() works correctly.
     """
     
     # if you add a choice with alleles, then check that we get back alleles,
     # and that they are the same
     choices = WeightedChoice()
     choices.add_choice(1, 1, "A", "T")
     self.assertEqual(choices.choice_with_alleles(),
         {'alt': 'T', 'ref': 'A', 'pos': 1, 'offset': 0})
     self.assertEqual(choices.choice(), 1)
     
     # if you add choices without alleles, then default the alleles to "N"
     choices = WeightedChoice()
     choices.add_choice(1, 1)
     self.assertEqual(choices.choice_with_alleles(),
         {'alt': 'N', 'ref': 'N', 'pos': 1, 'offset': 0})
     
     # make sure you can't add multi-base alleles to the choices
     with self.assertRaises(TypeError):
         choices.add_choice(1, 1, "AA", "A")
         choices.add_choice(1, 1, "A", "AG")
     
     # make sure non-zero offsets are returned corectly
     choices = WeightedChoice()
     choices.add_choice(1, 1, "A", "T", 3)
     self.assertEqual(choices.choice_with_alleles(),
         {'alt': 'T', 'ref': 'A', 'pos': 1, 'offset': 3})
     self.assertEqual(choices.choice(), 1)
    def test_analyse_bigger(self):
        ''' test a more realistically sized data set
        '''

        seed(0)
        rates = WeightedChoice()
        pos = sorted(set([randint(1000, 3000) for x in range(2000)]))

        for x in pos:
            rates.add_choice(x, uniform(1e-10, 1e-7), 'A', 'G')

        severity = [randint(0, 40) for x in pos]

        p = analyse(rates, severity, 150, 4, iterations=10000)
        self.assertAlmostEqual(p, 3e-4, places=2)
Example #10
0
    def test_append(self):
        """ test that append() works correctly
        """

        # construct two objects
        a = WeightedChoice()
        a.add_choice(1, 0.5)

        b = WeightedChoice()
        b.add_choice(2, 1)

        # add one object to the other
        a.append(b)

        # check that the first object has changed correctly, but the other
        # remains unchanged
        self.assertEqual(a.get_summed_rate(), 1.5)
        self.assertEqual(b.get_summed_rate(), 1.0)
 def test_append(self):
     """ test that append() works correctly
     """
     
     # construct two objects
     a = WeightedChoice()
     a.add_choice(1, 0.5)
     
     b = WeightedChoice()
     b.add_choice(2, 1)
     
     # add one object to the other
     a.append(b)
     
     # check that the first object has changed correctly, but the other
     # remains unchanged
     self.assertEqual(a.get_summed_rate(), 1.5)
     self.assertEqual(b.get_summed_rate(), 1.0)
    def test_analyse(self):
        ''' test that we run the simulations correctly
        '''

        rates = WeightedChoice()
        rates.add_choice(200, 1e-5, 'A', 'G')
        rates.add_choice(201, 2e-5, 'C', 'T')
        rates.add_choice(202, 1e-5, 'C', 'G')

        severity = [5, 10, 5]

        # define a test where the observed score will fall at the midpoint of
        # the simulated null distribution
        p = analyse(rates, severity, 8, 1, iterations=100000)
        self.assertAlmostEqual(p, 0.5, places=2)

        # now check when we sample two de novo mutations
        p = analyse(rates, severity, 15, 2, iterations=100000)
        self.assertAlmostEqual(p, 0.25, places=2)
    def test_analyse_extreme_p_value(self):
        ''' test when the observed severity score exceeds all possible values
        '''

        rates = WeightedChoice()
        rates.add_choice(200, 1e-5, 'A', 'G')
        rates.add_choice(201, 2e-5, 'C', 'T')
        rates.add_choice(202, 1e-5, 'C', 'G')

        severity = [5, 10, 5]

        # now check when the observed severity score exceeds all possible
        # values from the severity distribution. This test gives an absurd
        # p-value at 1e-6, but that is because the observed value is
        # unachievable given the existsing severity scores. In practice the
        # observed score will always be theoretically achieveable in the null
        # distribution, since the observed score is calculated from the
        # existsing scores.
        p = analyse(rates, severity, 20, 1, iterations=100000)
        self.assertAlmostEqual(p, 1e-6, places=4)
Example #14
0
    def test_choice_with_alleles(self):
        """ test that choice_with_alleles() works correctly.
        """

        # if you add a choice with alleles, then check that we get back alleles,
        # and that they are the same
        choices = WeightedChoice()
        choices.add_choice(1, 1, "A", "T")
        self.assertEqual(choices.choice_with_alleles(), {
            'alt': 'T',
            'ref': 'A',
            'pos': 1,
            'offset': 0
        })
        self.assertEqual(choices.choice(), 1)

        # if you add choices without alleles, then default the alleles to "N"
        choices = WeightedChoice()
        choices.add_choice(1, 1)
        self.assertEqual(choices.choice_with_alleles(), {
            'alt': 'N',
            'ref': 'N',
            'pos': 1,
            'offset': 0
        })

        # make sure you can't add multi-base alleles to the choices
        with self.assertRaises(TypeError):
            choices.add_choice(1, 1, "AA", "A")
            choices.add_choice(1, 1, "A", "AG")

        # make sure non-zero offsets are returned corectly
        choices = WeightedChoice()
        choices.add_choice(1, 1, "A", "T", 3)
        self.assertEqual(choices.choice_with_alleles(), {
            'alt': 'T',
            'ref': 'A',
            'pos': 1,
            'offset': 3
        })
        self.assertEqual(choices.choice(), 1)
Example #15
0
    def test_add_choice(self):
        """ test that add_choice() works correctly
        """

        # check the cumulative sum while adding in new values
        choices = WeightedChoice()
        choices.add_choice(1, 1)
        self.assertEqual(choices.get_summed_rate(), 1)
        choices.add_choice(2, 5)
        self.assertEqual(choices.get_summed_rate(), 6)
        choices.add_choice(3, 10)
        self.assertEqual(choices.get_summed_rate(), 16)

        # check that it works for unsorted probabilities
        choices = WeightedChoice()
        choices.add_choice(1, 1)
        choices.add_choice(2, 10)
        choices.add_choice(3, 5)
        self.assertEqual(choices.get_summed_rate(), 16)

        # check for very low values, with very high precision (but not
        # necessarily exactly equal)
        choices = WeightedChoice()
        choices.add_choice(1, 5e-9)
        choices.add_choice(2, 1e-8)
        choices.add_choice(3, 1.000000000000005e-10)
        self.assertAlmostEqual(choices.get_summed_rate(),
                               1.51000000000000005e-8,
                               places=23)
Example #16
0
 def test_add_choice(self):
     """ test that add_choice() works correctly
     """
     
     # check the cumulative sum while adding in new values
     choices = WeightedChoice()
     choices.add_choice(1, 1)
     self.assertEqual(choices.get_summed_rate(), 1)
     choices.add_choice(2, 5)
     self.assertEqual(choices.get_summed_rate(), 6)
     choices.add_choice(3, 10)
     self.assertEqual(choices.get_summed_rate(), 16)
     
     # check that it works for unsorted probabilities
     choices = WeightedChoice()
     choices.add_choice(1, 1)
     choices.add_choice(2, 10)
     choices.add_choice(3, 5)
     self.assertEqual(choices.get_summed_rate(), 16)
     
     # check for very low values, with very high precision (but not
     # necessarily exactly equal)
     choices = WeightedChoice()
     choices.add_choice(1, 5e-9)
     choices.add_choice(2, 1e-8)
     choices.add_choice(3, 1.000000000000005e-10)
     self.assertAlmostEqual(choices.get_summed_rate(), 1.51000000000000005e-8, places=23)