Exemple #1
0
 def test_ibd_segments_in_family(self):
     '''Test locating recombination locations and subsequently IBD regions between 
     parent and child haplotypes.'''
     segment_set = ibd.concatenate_segments(
         ip.ibd_segments_in_family(self.haplotype, self.family,
                                   PhaseParam.HET_FILL_THRESHOLD))
     assert_segments_almost_equal(segment_set, [
         ((3, 172), (17080378, 18541497, 1.461, 0), ((1, 1), (2, 1))),
         ((193, 3218), (18996226, 51156934, 32.161, 1), ((1, 0), (2, 1))),
         ((0, 3218), (16484792, 51156934, 34.672, 4), ((1, 0), (3, 1))),
         ((3, 240), (17080378, 19593301, 2.513, 0), ((1, 0), (4, 1))),
         ((256, 3218), (19873357, 51156934, 31.284, 0), ((4, 1), (1, 1))),
         ((0, 3218), (16484792, 51156934, 34.672, 0), ((5, 1), (1, 0))),
         ((3, 2696), (17080378, 46630634, 29.550, 0), ((6, 1), (1, 1))),
         ((2718, 3218), (46853292, 51156934, 4.304, 1), ((1, 0), (6, 1))),
         ((0, 3218), (16484792, 51156934, 34.672, 0), ((1, 0), (7, 1))),
         ((3, 1908), (17080378, 36572515, 19.492, 0), ((8, 1), (1, 0))),
         ((1939, 3218), (36804282, 51156934, 14.353, 0), ((8, 1), (1, 1)))
     ],
                                  full_data=True,
                                  decimal=3,
                                  err_msg='Wrong IBD segments')
     assert_equal(segment_set.errors, [[
         2997, 2997, 132, 1364, 2493, 2800, 132, 1364, 2493, 2800, 2997,
         2997
     ], [1, 2, 1, 1, 1, 1, 3, 3, 3, 3, 1, 6]], 'Wrong genotype errors')
 def test_ibd_segments(self):
     '''Test calculating IBD segments from child recombinations.'''
     parent_type = PATERNAL
     template = 2
     (_, _, info) = self.child_recombinations(parent_type, template, remove_errors=True)
     self.comparator.phase_parent_by_template(info)
     #ibd.phase_by_ibd(self.problem, info.ibs_segments())
     segment_set = ibd.concatenate_segments(x for x in info.ibs_segments())
     #print segment_set.pprint_segments(True)
     assert_segments_almost_equal(segment_set,
                                  [((0   , 2800), (-1      , -1      , -1.000, 0), ((2, 0), (0, 0))),
                                   ((2800, 3218), (-1      , -1      , -1.000, 0), ((0, 1), (2, 0))),
                                   ((0   , 3218), (-1      , -1      , -1.000, 0), ((3, 0), (0, 0))),
                                   ((0   , 3218), (-1      , -1      , -1.000, 0), ((0, 0), (4, 0))),
                                   ((0   , 3218), (-1      , -1      , -1.000, 0), ((0, 1), (5, 0))),
                                   ((0   , 3218), (-1      , -1      , -1.000, 0), ((0, 0), (6, 0))),
                                   ((0   , 55), (-1      , -1      , -1.000, 0), ((0, 0), (7, 0))),
                                   ((55  , 2981), (-1      , -1      , -1.000, 0), ((0, 1), (7, 0))),
                                   ((2981, 3218), (-1      , -1      , -1.000, 0), ((0, 0), (7, 0))),
                                   ((0   , 637), (-1      , -1      , -1.000, 0), ((8, 0), (0, 0))),
                                   ((637 , 2447), (-1      , -1      , -1.000, 0), ((0, 1), (8, 0))),
                                   ((2447, 3218), (-1      , -1      , -1.000, 0), ((8, 0), (0, 0))),
                                   ((0   , 3218), (-1      , -1      , -1.000, 0), ((9, 0), (0, 0))),
                                   ((0   , 3218), (-1      , -1      , -1.000, 0), ((10, 0), (0, 0))),
                                   ((0   , 3046), (-1      , -1      , -1.000, 0), ((11, 0), (0, 0))),
                                   ((3046, 3218), (-1      , -1      , -1.000, 0), ((0, 1), (11, 0))),
                                   ((0   , 3081), (-1      , -1      , -1.000, 0), ((0, 1), (12, 0))),
                                   ((3081, 3218), (-1      , -1      , -1.000, 0), ((0, 0), (12, 0))),
                                   ((0   , 199), (-1      , -1      , -1.000, 0), ((0, 0), (13, 0))),
                                   ((199 , 3089), (-1      , -1      , -1.000, 0), ((0, 1), (13, 0))),
                                   ((3089, 3218), (-1      , -1      , -1.000, 0), ((0, 0), (13, 0))),
                                   ((0   , 860), (-1      , -1      , -1.000, 0), ((0, 0), (14, 0))),
                                   ((860 , 3218), (-1      , -1      , -1.000, 0), ((0, 1), (14, 0)))],
                                  decimal=3, err_msg='Wrong IBD segments') 
Exemple #3
0
 def test_ibd_segments_in_family(self):
     '''Test locating recombination locations and subsequently IBD regions between 
     parent and child haplotypes.'''
     segment_set = ibd.concatenate_segments(ip.ibd_segments_in_family(self.haplotype, self.family,
                                                                      PhaseParam.HET_FILL_THRESHOLD))
     assert_segments_almost_equal(segment_set,
                                [((3   , 172), (17080378, 18541497, 1.461, 0), ((1, 1), (2, 1))),
                                 ((193 , 3218), (18996226, 51156934, 32.161, 1), ((1, 0), (2, 1))),
                                 ((0   , 3218), (16484792, 51156934, 34.672, 4), ((1, 0), (3, 1))),
                                 ((3   , 240), (17080378, 19593301, 2.513, 0), ((1, 0), (4, 1))),
                                 ((256 , 3218), (19873357, 51156934, 31.284, 0), ((4, 1), (1, 1))),
                                 ((0   , 3218), (16484792, 51156934, 34.672, 0), ((5, 1), (1, 0))),
                                 ((3   , 2696), (17080378, 46630634, 29.550, 0), ((6, 1), (1, 1))),
                                 ((2718, 3218), (46853292, 51156934, 4.304, 1), ((1, 0), (6, 1))),
                                 ((0   , 3218), (16484792, 51156934, 34.672, 0), ((1, 0), (7, 1))),
                                 ((3   , 1908), (17080378, 36572515, 19.492, 0), ((8, 1), (1, 0))),
                                 ((1939, 3218), (36804282, 51156934, 14.353, 0), ((8, 1), (1, 1)))],
                                 full_data=True, decimal=3, err_msg='Wrong IBD segments')
     assert_equal(segment_set.errors,
                  [[2997, 2997, 132, 1364, 2493, 2800, 132, 1364, 2493, 2800, 2997, 2997],
                   [   1, 2, 1, 1, 1, 1, 3, 3, 3, 3, 1, 6]],
                  'Wrong genotype errors')
 def test_ibd_segments(self):
     '''Test calculating IBD segments from child recombinations.'''
     parent_type = PATERNAL
     template = 2
     (_, _, info) = self.child_recombinations(parent_type,
                                              template,
                                              remove_errors=True)
     self.comparator.phase_parent_by_template(info)
     #ibd.phase_by_ibd(self.problem, info.ibs_segments())
     segment_set = ibd.concatenate_segments(x for x in info.ibs_segments())
     #print segment_set.pprint_segments(True)
     assert_segments_almost_equal(
         segment_set,
         [((0, 2800), (-1, -1, -1.000, 0), ((2, 0), (0, 0))),
          ((2800, 3218), (-1, -1, -1.000, 0), ((0, 1), (2, 0))),
          ((0, 3218), (-1, -1, -1.000, 0), ((3, 0), (0, 0))),
          ((0, 3218), (-1, -1, -1.000, 0), ((0, 0), (4, 0))),
          ((0, 3218), (-1, -1, -1.000, 0), ((0, 1), (5, 0))),
          ((0, 3218), (-1, -1, -1.000, 0), ((0, 0), (6, 0))),
          ((0, 55), (-1, -1, -1.000, 0), ((0, 0), (7, 0))),
          ((55, 2981), (-1, -1, -1.000, 0), ((0, 1), (7, 0))),
          ((2981, 3218), (-1, -1, -1.000, 0), ((0, 0), (7, 0))),
          ((0, 637), (-1, -1, -1.000, 0), ((8, 0), (0, 0))),
          ((637, 2447), (-1, -1, -1.000, 0), ((0, 1), (8, 0))),
          ((2447, 3218), (-1, -1, -1.000, 0), ((8, 0), (0, 0))),
          ((0, 3218), (-1, -1, -1.000, 0), ((9, 0), (0, 0))),
          ((0, 3218), (-1, -1, -1.000, 0), ((10, 0), (0, 0))),
          ((0, 3046), (-1, -1, -1.000, 0), ((11, 0), (0, 0))),
          ((3046, 3218), (-1, -1, -1.000, 0), ((0, 1), (11, 0))),
          ((0, 3081), (-1, -1, -1.000, 0), ((0, 1), (12, 0))),
          ((3081, 3218), (-1, -1, -1.000, 0), ((0, 0), (12, 0))),
          ((0, 199), (-1, -1, -1.000, 0), ((0, 0), (13, 0))),
          ((199, 3089), (-1, -1, -1.000, 0), ((0, 1), (13, 0))),
          ((3089, 3218), (-1, -1, -1.000, 0), ((0, 0), (13, 0))),
          ((0, 860), (-1, -1, -1.000, 0), ((0, 0), (14, 0))),
          ((860, 3218), (-1, -1, -1.000, 0), ((0, 1), (14, 0)))],
         decimal=3,
         err_msg='Wrong IBD segments')
    def test_ibd_segments_in_family(self):
        '''Test locating recombination locations and subsequently IBD regions between 
        parent and child haplotypes.'''
        segment_set = ibd.concatenate_segments(ip.ibd_segments_in_family(self.problem.haplotype, self.family,
                                                                         PhaseParam.HET_FILL_THRESHOLD))
        assert_segments_almost_equal(segment_set,
                                   [((4   , 1411), (17087656, 32978753, 15.891, 0), ((2, 0), (0, 0))),
                                    ((1413, 3215), (33013062, 51103692, 18.091, 1), ((0, 1), (2, 0))),
                                    ((0   , 3218), (16484792, 51156934, 34.672, 1), ((0, 1), (3, 0))),
                                    ((4   , 2551), (17087656, 45007348, 27.920, 1), ((0, 1), (4, 0))),
                                    ((2569, 3215), (45198494, 51103692, 5.905, 0), ((0, 0), (4, 0))),
                                    ((0   , 3218), (16484792, 51156934, 34.672, 1), ((0, 0), (5, 0))),
                                    ((4   , 795), (17087656, 27011420, 9.924, 0), ((0, 0), (6, 0))),
                                    ((805 , 3215), (27119061, 51103692, 23.985, 1), ((0, 1), (6, 0))),
                                    ((2   , 2650), (17075353, 45892433, 28.817, 0), ((1, 1), (2, 1))),
                                    ((2657, 3218), (45940934, 51156934, 5.216, 2), ((1, 0), (2, 1))),
                                    ((1   , 576), (17065079, 25228375, 8.163, 0), ((3, 1), (1, 1))),
                                    ((600 , 3218), (25444874, 51156934, 25.712, 2), ((1, 0), (3, 1))),
                                    ((2   , 1978), (17075353, 37228277, 20.153, 2), ((1, 0), (4, 1))),
                                    ((2019, 3218), (37509844, 51156934, 13.647, 0), ((4, 1), (1, 1))),
                                    ((0   , 3218), (16484792, 51156934, 34.672, 3), ((5, 1), (1, 0))),
                                    ((2   , 301), (17075353, 20993766, 3.918, 0), ((6, 1), (1, 1))),
                                    ((334 , 3218), (21363960, 51156934, 29.793, 2), ((1, 0), (6, 1)))],
                                    full_data=True, decimal=3, err_msg='Wrong IBD segments')

        # Check that there are at most 4 distinct haplotypes at each SNP        
        segment_set.group_to_disjoint(True)
        assert_segments_almost_equal(segment_set,
                                     [((0   , 1), (16484792, 17065079, 0.580, 0), ((5, 1), (1, 0))),
                                      ((0   , 1), (16484792, 17065079, 0.580, 0), ((0, 1), (3, 0))),
                                      ((0   , 1), (16484792, 17065079, 0.580, 0), ((0, 0), (5, 0))),
                                      ((1   , 2), (17065079, 17075353, 0.010, 0), ((5, 1), (1, 0))),
                                      ((1   , 2), (17065079, 17075353, 0.010, 0), ((3, 1), (1, 1))),
                                      ((1   , 2), (17065079, 17075353, 0.010, 0), ((0, 1), (3, 0))),
                                      ((1   , 2), (17065079, 17075353, 0.010, 0), ((0, 0), (5, 0))),
                                      ((2   , 4), (17075353, 17087656, 0.012, 0), ((0, 0), (5, 0))),
                                      ((2   , 4), (17075353, 17087656, 0.012, 0), ((0, 1), (3, 0))),
                                      ((2   , 4), (17075353, 17087656, 0.012, 0), ((6, 1), (3, 1), (1, 1), (2, 1))),
                                      ((2   , 4), (17075353, 17087656, 0.012, 0), ((5, 1), (1, 0), (4, 1))),
                                      ((4   , 301), (17087656, 20993766, 3.906, 0), ((2, 0), (0, 0), (6, 0), (5, 0))),
                                      ((4   , 301), (17087656, 20993766, 3.906, 0), ((0, 1), (3, 0), (4, 0))),
                                      ((4   , 301), (17087656, 20993766, 3.906, 0), ((6, 1), (3, 1), (1, 1), (2, 1))),
                                      ((4   , 301), (17087656, 20993766, 3.906, 0), ((5, 1), (1, 0), (4, 1))),
                                      ((301 , 334), (20993766, 21363960, 0.370, 0), ((2, 0), (0, 0), (6, 0), (5, 0))),
                                      ((301 , 334), (20993766, 21363960, 0.370, 0), ((0, 1), (3, 0), (4, 0))),
                                      ((301 , 334), (20993766, 21363960, 0.370, 0), ((3, 1), (1, 1), (2, 1))),
                                      ((301 , 334), (20993766, 21363960, 0.370, 0), ((5, 1), (1, 0), (4, 1))),
                                      ((334 , 576), (21363960, 25228375, 3.864, 0), ((2, 0), (0, 0), (6, 0), (5, 0))),
                                      ((334 , 576), (21363960, 25228375, 3.864, 0), ((0, 1), (3, 0), (4, 0))),
                                      ((334 , 576), (21363960, 25228375, 3.864, 0), ((3, 1), (1, 1), (2, 1))),
                                      ((334 , 576), (21363960, 25228375, 3.864, 0), ((5, 1), (6, 1), (4, 1), (1, 0))),
                                      ((576 , 600), (25228375, 25444874, 0.216, 0), ((1, 1), (2, 1))),
                                      ((576 , 600), (25228375, 25444874, 0.216, 0), ((2, 0), (0, 0), (6, 0), (5, 0))),
                                     ((576 , 600), (25228375, 25444874, 0.216, 0), ((0, 1), (3, 0), (4, 0))),
                                     ((576 , 600), (25228375, 25444874, 0.216, 0), ((5, 1), (6, 1), (4, 1), (1, 0))),
                                     ((600 , 795), (25444874, 27011420, 1.567, 0), ((5, 1), (6, 1), (3, 1), (4, 1), (1, 0))),
                                     ((600 , 795), (25444874, 27011420, 1.567, 0), ((2, 0), (0, 0), (6, 0), (5, 0))),
                                     ((600 , 795), (25444874, 27011420, 1.567, 0), ((0, 1), (3, 0), (4, 0))),
                                     ((600 , 795), (25444874, 27011420, 1.567, 0), ((1, 1), (2, 1))),
                                     ((795 , 805), (27011420, 27119061, 0.108, 0), ((5, 1), (6, 1), (3, 1), (4, 1), (1, 0))),
                                     ((795 , 805), (27011420, 27119061, 0.108, 0), ((2, 0), (0, 0), (5, 0))),
                                     ((795 , 805), (27011420, 27119061, 0.108, 0), ((0, 1), (3, 0), (4, 0))),
                                     ((795 , 805), (27011420, 27119061, 0.108, 0), ((1, 1), (2, 1))),
                                     ((805 , 1411), (27119061, 32978753, 5.860, 0), ((5, 1), (6, 1), (3, 1), (4, 1), (1, 0))),
                                     ((805 , 1411), (27119061, 32978753, 5.860, 0), ((2, 0), (0, 0), (5, 0))),
                                     ((805 , 1411), (27119061, 32978753, 5.860, 0), ((0, 1), (3, 0), (6, 0), (4, 0))),
                                     ((805 , 1411), (27119061, 32978753, 5.860, 0), ((1, 1), (2, 1))),
                                     ((1411, 1413), (32978753, 33013062, 0.034, 0), ((5, 1), (6, 1), (3, 1), (4, 1), (1, 0))),
                                     ((1411, 1413), (32978753, 33013062, 0.034, 0), ((1, 1), (2, 1))),
                                     ((1411, 1413), (32978753, 33013062, 0.034, 0), ((0, 1), (3, 0), (6, 0), (4, 0))),
                                     ((1411, 1413), (32978753, 33013062, 0.034, 0), ((0, 0), (5, 0))),
                                     ((1413, 1978), (33013062, 37228277, 4.215, 0), ((5, 1), (6, 1), (3, 1), (4, 1), (1, 0))),
                                     ((1413, 1978), (33013062, 37228277, 4.215, 0), ((0, 1), (3, 0), (4, 0), (6, 0), (2, 0))),
                                     ((1413, 1978), (33013062, 37228277, 4.215, 0), ((1, 1), (2, 1))),
                                     ((1413, 1978), (33013062, 37228277, 4.215, 0), ((0, 0), (5, 0))),
                                     ((1978, 2019), (37228277, 37509844, 0.282, 0), ((1, 1), (2, 1))),
                                     ((1978, 2019), (37228277, 37509844, 0.282, 0), ((0, 1), (3, 0), (4, 0), (6, 0), (2, 0))),
                                     ((1978, 2019), (37228277, 37509844, 0.282, 0), ((5, 1), (6, 1), (3, 1), (1, 0))),
                                     ((1978, 2019), (37228277, 37509844, 0.282, 0), ((0, 0), (5, 0))),
                                     ((2019, 2551), (37509844, 45007348, 7.498, 0), ((0, 1), (3, 0), (4, 0), (6, 0), (2, 0))),
                                     ((2019, 2551), (37509844, 45007348, 7.498, 0), ((0, 0), (5, 0))),
                                     ((2019, 2551), (37509844, 45007348, 7.498, 0), ((5, 1), (6, 1), (3, 1), (1, 0))),
                                     ((2019, 2551), (37509844, 45007348, 7.498, 0), ((4, 1), (1, 1), (2, 1))),
                                     ((2551, 2569), (45007348, 45198494, 0.191, 0), ((0, 0), (5, 0))),
                                     ((2551, 2569), (45007348, 45198494, 0.191, 0), ((0, 1), (3, 0), (6, 0), (2, 0))),
                                     ((2551, 2569), (45007348, 45198494, 0.191, 0), ((5, 1), (6, 1), (3, 1), (1, 0))),
                                     ((2551, 2569), (45007348, 45198494, 0.191, 0), ((4, 1), (1, 1), (2, 1))),
                                     ((2569, 2650), (45198494, 45892433, 0.694, 0), ((0, 0), (5, 0), (4, 0))),
                                     ((2569, 2650), (45198494, 45892433, 0.694, 0), ((0, 1), (3, 0), (6, 0), (2, 0))),
                                     ((2569, 2650), (45198494, 45892433, 0.694, 0), ((5, 1), (6, 1), (3, 1), (1, 0))),
                                     ((2569, 2650), (45198494, 45892433, 0.694, 0), ((4, 1), (1, 1), (2, 1))),
                                     ((2650, 2657), (45892433, 45940934, 0.049, 0), ((0, 0), (5, 0), (4, 0))),
                                     ((2650, 2657), (45892433, 45940934, 0.049, 0), ((0, 1), (3, 0), (6, 0), (2, 0))),
                                     ((2650, 2657), (45892433, 45940934, 0.049, 0), ((5, 1), (6, 1), (3, 1), (1, 0))),
                                     ((2650, 2657), (45892433, 45940934, 0.049, 0), ((4, 1), (1, 1))),
                                     ((2657, 3215), (45940934, 51103692, 5.163, 0), ((5, 1), (6, 1), (3, 1), (1, 0), (2, 1))),
                                     ((2657, 3215), (45940934, 51103692, 5.163, 0), ((0, 1), (3, 0), (6, 0), (2, 0))),
                                     ((2657, 3215), (45940934, 51103692, 5.163, 0), ((0, 0), (5, 0), (4, 0))),
                                     ((2657, 3215), (45940934, 51103692, 5.163, 0), ((4, 1), (1, 1))),
                                     ((3215, 3218), (51103692, 51156934, 0.053, 0), ((5, 1), (6, 1), (3, 1), (1, 0), (2, 1))),
                                     ((3215, 3218), (51103692, 51156934, 0.053, 0), ((4, 1), (1, 1))),
                                     ((3215, 3218), (51103692, 51156934, 0.053, 0), ((0, 1), (3, 0))),
                                     ((3215, 3218), (51103692, 51156934, 0.053, 0), ((0, 0), (5, 0)))],
                                     full_data=True, decimal=3, err_msg='Wrong IBD segments')
        assert_equal(segment_set.errors,
                     [[2162, 2162, 2162, 2162, 2162, 2162, 2162, 2162, 2162, 2162, 2846, 3202, 2846,
                       3202, 1092, 3202, 1092, 3202, 3, 1092, 3, 1092, 3, 1092, 3202, 3, 1092, 3202,
                        1092, 3202, 1092, 3202],
                      [0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 1, 1, 2, 2, 1, 1, 3, 3, 1, 1, 4, 4, 1, 1, 1, 5, 5,
                       5, 1, 1, 6, 6]],
                     'Wrong genotype errors')
        
        self.assertTrue(max(len(list(g)) for (_, g) in itertools.groupby(segment_set, lambda segment: segment.snp)) <= 4,
                        'Too many distinct haplotypes in nuclear family')
        
        # Extract IBD segments of child's haplotype vs. sibs
        segments_of_30 = SegmentSet(Segment(x.snp, x.samples - set([(0, 0), (0, 1), (1, 0), (1, 1)]), x.bp, error_snps=x.error_snps)
                                   for x in segment_set if (3, 0) in x.samples)
        segments_of_30.merge_consecutive()
        assert_segments_almost_equal(segments_of_30,
                                     [((0   , 4), (16484792, 17087656, 0.603, 0), ((3, 0),)),
                                      ((4   , 805), (17087656, 27119061, 10.031, 0), ((3, 0), (4, 0))),
                                      ((805 , 1413), (27119061, 33013062, 5.894, 0), ((3, 0), (6, 0), (4, 0))),
                                      ((1413, 2551), (33013062, 45007348, 11.994, 0), ((3, 0), (2, 0), (6, 0), (4, 0))),
                                      ((2551, 3215), (45007348, 51103692, 6.096, 0), ((3, 0), (2, 0), (6, 0))),
                                      ((3215, 3218), (51103692, 51156934, 0.053, 0), ((3, 0),))],
                                     full_data=True, decimal=3, err_msg='Wrong IBD segments')

        segments_of_31 = SegmentSet(Segment(x.snp, x.samples - set([(0, 0), (0, 1), (1, 0), (1, 1)]), x.bp, error_snps=x.error_snps)
                                   for x in segment_set if (3, 1) in x.samples)
        segments_of_31.merge_consecutive()
        assert_segments_almost_equal(segments_of_31,
                                     [((1   , 2), (17065079, 17075353, 0.010, 0), ((3, 1),)),
                                      ((2   , 301), (17075353, 20993766, 3.918, 0), ((6, 1), (3, 1), (2, 1))),
                                      ((301 , 576), (20993766, 25228375, 4.235, 0), ((3, 1), (2, 1))),
                                      ((600 , 1978), (25444874, 37228277, 11.783, 0), ((5, 1), (6, 1), (3, 1), (4, 1))),
                                      ((1978, 2657), (37228277, 45940934, 8.713, 0), ((5, 1), (6, 1), (3, 1))),
                                      ((2657, 3218), (45940934, 51156934, 5.216, 0), ((5, 1), (6, 1), (3, 1), (2, 1)))],
                                     full_data=True, decimal=3, err_msg='Wrong IBD segments')