def test_match_maternal(self): '''Test hashing the rows of a haplotype matrix with the match() function. Here H is n x s.''' h_mat = ig._HapMatrix(self.problem, self.sibs, parent_type=MATERNAL) h = h_mat.h assert_equal(len(h_mat.snps), 2929, 'Wrong # filled snps') assert_equal(h.shape, (len(self.sibs), len(h_mat.snps)), 'Wrong extracted hap matrix size') assert_equal(self.ibd_computer._match(h[:, 0:10]), set([(1, 2)]), 'Wrong match set')
def test_ibd_segments_both_parents(self): '''Test computing GERMLINE IBD segments.''' h_mat = ig._HapMatrix(self.problem, self.sibs) m = self.ibd_computer.ibd_segments(h_mat) assert_segments_almost_equal(m, [((0 , 344), (16484792, 21449028, 4.964, 0), ((2, 0), (4, 0))), ((0 , 344), (16484792, 21449028, 4.964, 0), ((1, 0), (4, 0))), ((0 , 780), (16484792, 26920270, 10.435, 0), ((4, 1), (2, 1))), ((2145, 2996), (39608193, 48759228, 9.151, 0), ((1, 1), (2, 1))), ((2145, 2996), (39608193, 48759228, 9.151, 0), ((4, 1), (2, 1))), ((885 , 3218), (27425790, 51156933, 23.731, 0), ((4, 1), (1, 1))), ((0 , 3218), (16484792, 51156933, 34.672, 0), ((2, 0), (1, 0)))], full_data=True, decimal=3, err_msg='Wrong IBD segments, raw') # A transitive-logic test test to see that we don't miss any intervals with GERMLINE m.group_to_disjoint(False) assert_segments_almost_equal(m, [((0 , 344), (16484792, 21449028, 4.964, 0), ((2, 0), (1, 0), (4, 0))), ((0 , 344), (16484792, 21449028, 4.964, 0), ((4, 1), (2, 1))), ((344 , 780), (21449028, 26920270, 5.471, 0), ((2, 0), (1, 0))), ((344 , 780), (21449028, 26920270, 5.471, 0), ((4, 1), (2, 1))), ((780 , 885), (26920270, 27425790, 0.506, 0), ((2, 0), (1, 0))), ((885 , 2145), (27425790, 39608193, 12.182, 0), ((2, 0), (1, 0))), ((885 , 2145), (27425790, 39608193, 12.182, 0), ((4, 1), (1, 1))), ((2145, 2996), (39608193, 48759228, 9.151, 0), ((4, 1), (1, 1), (2, 1))), ((2145, 2996), (39608193, 48759228, 9.151, 0), ((2, 0), (1, 0))), ((2996, 3218), (48759228, 51156933, 2.398, 0), ((2, 0), (1, 0))), ((2996, 3218), (48759228, 51156933, 2.398, 0), ((4, 1), (1, 1)))], full_data=True, decimal=3, err_msg='Wrong IBD segments, grouped') stats = np.array([(len(s.samples), s.length) for s in m]) best_segment = np.lexsort((-stats[:, 1], -stats[:, 0]))[0] assert_equal(best_segment, 7, 'Wrong best segment (IBD set size + length)') assert_almost_equal(m[best_segment].length, 9.15, decimal=2, err_msg='Wrong best segment (IBD set size + length)') assert_equal(m[best_segment].samples, set([(4, 1), (1, 1), (2, 1)]), err_msg='Wrong best segment (IBD set size + length)')
def test_match(self): '''Test hashing the rows of a haplotype matrix with the match() function. Here H is n x s.''' h_mat = ig._HapMatrix(self.problem, self.sibs, parent_type=MATERNAL) h = h_mat.h assert_equal(len(h_mat.snps), 2915, 'Wrong # filled snps') assert_equal(h.shape, (len(self.sibs), len(h_mat.snps)), 'Wrong extracted hap matrix size') assert_equal(self.ibd_computer._match(h[:, 0:10]), set([(1, 2), (1, 3), (2, 3)]), 'Wrong match set')
def test_match(self): '''Test hashing the rows of a haplotype matrix with the match() function.''' h_mat = ig._HapMatrix(self.problem, self.sibs) h = h_mat.h assert_equal(len(h_mat.snps), 3128, 'Wrong # filled snps') assert_equal(h.shape, (2 * len(self.sibs), len(h_mat.snps)), 'Wrong extracted hap matrix size') assert_equal( self.ibd_computer._match(h[:, 0:10]), set([(1, 3), (3, 7), (0, 6), (0, 9), (1, 5), (1, 7), (3, 5), (5, 7), (2, 4), (6, 9)]), 'Wrong match set')
def test_match(self): '''Test hashing the rows of a haplotype matrix with the match() function.''' h_mat = ig._HapMatrix(self.problem, self.sibs) h = h_mat.h assert_equal(len(h_mat.snps), 3128, 'Wrong # filled snps') assert_equal(h.shape, (2 * len(self.sibs), len(h_mat.snps)), 'Wrong extracted hap matrix size') assert_equal(self.ibd_computer._match(h[:, 0:10]), set([(1, 3), (3, 7), (0, 6), (0, 9), (1, 5), (1, 7), (3, 5), (5, 7), (2, 4), (6, 9)]), 'Wrong match set')
def test_ibd_segments(self): '''Test computing GERMLINE IBD segments.''' h_mat = ig._HapMatrix(self.problem, self.sibs) m = self.ibd_computer.ibd_segments(h_mat) m.group_to_disjoint() assert_segments_almost_equal( m, [((0, 235), (16484792, 19567818, 3.083, 0), ((3, 0), (1, 0), (5, 0))), ((0, 235), (16484792, 19567818, 3.083, 0), ((5, 1), (3, 1), (2, 1))), ((235, 345), (19567818, 21809185, 2.241, 0), ((3, 0), (1, 0), (5, 0))), ((235, 345), (19567818, 21809185, 2.241, 0), ((5, 1), (3, 1))), ((345, 450), (21809185, 23817240, 2.008, 0), ((3, 0), (1, 0))), ((345, 450), (21809185, 23817240, 2.008, 0), ((5, 1), (3, 1))), ((345, 450), (21809185, 23817240, 2.008, 0), ((1, 1), (2, 1))), ((450, 781), (23817240, 26924312, 3.107, 0), ((3, 0), (1, 0))), ((450, 781), (23817240, 26924312, 3.107, 0), ((5, 1), (3, 1))), ((450, 781), (23817240, 26924312, 3.107, 0), ((1, 1), (2, 1))), ((450, 781), (23817240, 26924312, 3.107, 0), ((2, 0), (5, 0))), ((781, 886), (26924312, 27427698, 0.503, 0), ((3, 0), (1, 0))), ((781, 886), (26924312, 27427698, 0.503, 0), ((1, 1), (2, 1))), ((781, 886), (26924312, 27427698, 0.503, 0), ((2, 0), (5, 0))), ((886, 2157), (27427698, 39797178, 12.369, 0), ((5, 1), (1, 1), (2, 1))), ((886, 2157), (27427698, 39797178, 12.369, 0), ((3, 0), (1, 0))), ((886, 2157), (27427698, 39797178, 12.369, 0), ((2, 0), (5, 0))), ((2157, 2791), (39797178, 47440321, 7.643, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((2157, 2791), (39797178, 47440321, 7.643, 0), ((3, 0), (1, 0))), ((2157, 2791), (39797178, 47440321, 7.643, 0), ((2, 0), (5, 0))), ((2791, 2901), (47440321, 48282594, 0.842, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((2791, 2901), (47440321, 48282594, 0.842, 0), ((3, 0), (1, 0))), ((2901, 3012), (48282594, 48855412, 0.573, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((2901, 3012), (48282594, 48855412, 0.573, 0), ((3, 0), (2, 0), (1, 0))), ((3012, 3218), (48855412, 51156933, 2.302, 0), ((3, 0), (2, 0), (1, 0))), ((3012, 3218), (48855412, 51156933, 2.302, 0), ((5, 1), (1, 1), (2, 1)))], full_data=True, decimal=3, err_msg='Wrong IBD segments') stats = np.array([(len(s.samples), s.length) for s in m]) best_segment = np.lexsort((-stats[:, 1], -stats[:, 0]))[0] #print np.lexsort((-stats[:, 1], -stats[:, 0])) # print m[best_segment] assert_equal(best_segment, 17, 'Wrong best segment (IBD set size + length)')
def test_ibd_segments(self): '''Test computing GERMLINE IBD segments.''' m = self.ibd_computer.ibd_segments(ig._HapMatrix(self.problem, self.sibs)).group_to_disjoint() assert_segments_almost_equal(m, [((0 , 20), (16484792, 17318333, 0.834, 0), ((3, 1), (4, 1), (1, 1), (2, 1))), ((0 , 20), (16484792, 17318333, 0.834, 0), ((0, 1), (3, 0), (5, 0))), ((0 , 20), (16484792, 17318333, 0.834, 0), ((2, 0), (0, 0), (4, 0))), ((0 , 20), (16484792, 17318333, 0.834, 0), ((5, 1), (1, 0))), ((20 , 121), (17318333, 18235305, 0.917, 0), ((5, 1), (4, 1), (1, 1), (2, 1))), ((20 , 121), (17318333, 18235305, 0.917, 0), ((0, 1), (3, 0), (5, 0))), ((20 , 121), (17318333, 18235305, 0.917, 0), ((2, 0), (0, 0), (4, 0))), ((121 , 224), (18235305, 19455836, 1.221, 0), ((5, 1), (4, 1), (1, 1), (2, 1))), ((121 , 224), (18235305, 19455836, 1.221, 0), ((0, 1), (3, 0), (5, 0))), ((121 , 224), (18235305, 19455836, 1.221, 0), ((0, 0), (4, 0))), ((121 , 224), (18235305, 19455836, 1.221, 0), ((1, 0), (3, 1))), ((224 , 828), (19455836, 27236071, 7.780, 0), ((0, 1), (3, 0), (5, 0), (2, 0))), ((224 , 828), (19455836, 27236071, 7.780, 0), ((5, 1), (4, 1), (1, 1), (2, 1))), ((224 , 828), (19455836, 27236071, 7.780, 0), ((0, 0), (4, 0))), ((224 , 828), (19455836, 27236071, 7.780, 0), ((1, 0), (3, 1))), ((828 , 2039), (27236071, 37707962, 10.472, 0), ((0, 1), (3, 0), (5, 0), (2, 0))), ((828 , 2039), (27236071, 37707962, 10.472, 0), ((1, 0), (3, 1), (4, 1))), ((828 , 2039), (27236071, 37707962, 10.472, 0), ((5, 1), (1, 1), (2, 1))), ((828 , 2039), (27236071, 37707962, 10.472, 0), ((0, 0), (4, 0))), ((2039, 2442), (37707962, 44264502, 6.557, 0), ((0, 1), (3, 0), (5, 0), (2, 0))), ((2039, 2442), (37707962, 44264502, 6.557, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((2039, 2442), (37707962, 44264502, 6.557, 0), ((0, 0), (4, 0))), ((2039, 2442), (37707962, 44264502, 6.557, 0), ((1, 0), (4, 1))), ((2442, 2542), (44264502, 44930087, 0.666, 0), ((0, 1), (3, 0), (5, 0), (2, 0))), ((2442, 2542), (44264502, 44930087, 0.666, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((2442, 2542), (44264502, 44930087, 0.666, 0), ((1, 0), (4, 1))), ((2542, 2642), (44930087, 45823327, 0.893, 0), ((0, 1), (3, 0), (4, 0), (5, 0), (2, 0))), ((2542, 2642), (44930087, 45823327, 0.893, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((2542, 2642), (44930087, 45823327, 0.893, 0), ((1, 0), (4, 1))), ((2642, 2742), (45823327, 47169675, 1.346, 0), ((0, 1), (3, 0), (5, 0), (4, 0))), ((2642, 2742), (45823327, 47169675, 1.346, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((2642, 2742), (45823327, 47169675, 1.346, 0), ((1, 0), (4, 1))), ((2742, 2942), (47169675, 48530211, 1.361, 0), ((0, 1), (3, 0), (5, 0), (4, 0))), ((2742, 2942), (47169675, 48530211, 1.361, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((2742, 2942), (47169675, 48530211, 1.361, 0), ((2, 0), (0, 0))), ((2742, 2942), (47169675, 48530211, 1.361, 0), ((1, 0), (4, 1))), ((2942, 3042), (48530211, 49168613, 0.638, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((2942, 3042), (48530211, 49168613, 0.638, 0), ((0, 1), (3, 0), (4, 0))), ((2942, 3042), (48530211, 49168613, 0.638, 0), ((2, 0), (0, 0))), ((2942, 3042), (48530211, 49168613, 0.638, 0), ((1, 0), (4, 1))), ((3042, 3218), (49168613, 51156933, 1.988, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((3042, 3218), (49168613, 51156933, 1.988, 0), ((0, 1), (3, 0), (4, 0))), ((3042, 3218), (49168613, 51156933, 1.988, 0), ((2, 0), (0, 0), (5, 0))), ((3042, 3218), (49168613, 51156933, 1.988, 0), ((1, 0), (4, 1)))], full_data=True, decimal=3, err_msg='Wrong IBD segments, grouped') stats = np.array([(len(s.samples), s.length) for s in m]) best_segment = np.lexsort((-stats[:, 1], -stats[:, 0]))[0] assert_equal(best_segment, 26, 'Wrong best segment (IBD set size + length)')
def test_hash(self): '''Test hashing the rows of a haplotype matrix with the match() function. Here H is n x s.''' h_mat = ig._HapMatrix(self.problem, self.sibs, parent_type=MATERNAL) h = h_mat.h h = h[:, 0:10] assert_equal(h[1], h[2], 'These two test rows should be equal') def hash_array(x): x.tostring() assert_equal(hash_array(h[1]), hash_array(h[2]), 'Test objects should be equal, if not, rerun test') d = {} for (i, hi) in enumerate(h): d.setdefault(Hashable(hi), []).append(i) assert_equal(len(d), 2, 'Hashing haps into dictionary did not work')
def test_ibd_segments_both_parents(self): '''Test computing GERMLINE IBD segments.''' h_mat = ig._HapMatrix(self.problem, self.sibs) m = self.ibd_computer.ibd_segments(h_mat) assert_segments_almost_equal( m, [((0, 344), (16484792, 21449028, 4.964, 0), ((2, 0), (4, 0))), ((0, 344), (16484792, 21449028, 4.964, 0), ((1, 0), (4, 0))), ((0, 780), (16484792, 26920270, 10.435, 0), ((4, 1), (2, 1))), ((2145, 2996), (39608193, 48759228, 9.151, 0), ((1, 1), (2, 1))), ((2145, 2996), (39608193, 48759228, 9.151, 0), ((4, 1), (2, 1))), ((885, 3218), (27425790, 51156933, 23.731, 0), ((4, 1), (1, 1))), ((0, 3218), (16484792, 51156933, 34.672, 0), ((2, 0), (1, 0)))], full_data=True, decimal=3, err_msg='Wrong IBD segments, raw') # A transitive-logic test test to see that we don't miss any intervals with GERMLINE m.group_to_disjoint(False) assert_segments_almost_equal( m, [((0, 344), (16484792, 21449028, 4.964, 0), ((2, 0), (1, 0), (4, 0))), ((0, 344), (16484792, 21449028, 4.964, 0), ((4, 1), (2, 1))), ((344, 780), (21449028, 26920270, 5.471, 0), ((2, 0), (1, 0))), ((344, 780), (21449028, 26920270, 5.471, 0), ((4, 1), (2, 1))), ((780, 885), (26920270, 27425790, 0.506, 0), ((2, 0), (1, 0))), ((885, 2145), (27425790, 39608193, 12.182, 0), ((2, 0), (1, 0))), ((885, 2145), (27425790, 39608193, 12.182, 0), ((4, 1), (1, 1))), ((2145, 2996), (39608193, 48759228, 9.151, 0), ((4, 1), (1, 1), (2, 1))), ((2145, 2996), (39608193, 48759228, 9.151, 0), ((2, 0), (1, 0))), ((2996, 3218), (48759228, 51156933, 2.398, 0), ((2, 0), (1, 0))), ((2996, 3218), (48759228, 51156933, 2.398, 0), ((4, 1), (1, 1)))], full_data=True, decimal=3, err_msg='Wrong IBD segments, grouped') stats = np.array([(len(s.samples), s.length) for s in m]) best_segment = np.lexsort((-stats[:, 1], -stats[:, 0]))[0] assert_equal(best_segment, 7, 'Wrong best segment (IBD set size + length)') assert_almost_equal( m[best_segment].length, 9.15, decimal=2, err_msg='Wrong best segment (IBD set size + length)') assert_equal(m[best_segment].samples, set([(4, 1), (1, 1), (2, 1)]), err_msg='Wrong best segment (IBD set size + length)')
def test_ibd_segments(self): '''Test computing GERMLINE IBD segments.''' h_mat = ig._HapMatrix(self.problem, self.sibs) m = self.ibd_computer.ibd_segments(h_mat) m.group_to_disjoint() assert_segments_almost_equal(m, [((0 , 235), (16484792, 19567818, 3.083, 0), ((3, 0), (1, 0), (5, 0))), ((0 , 235), (16484792, 19567818, 3.083, 0), ((5, 1), (3, 1), (2, 1))), ((235 , 345), (19567818, 21809185, 2.241, 0), ((3, 0), (1, 0), (5, 0))), ((235 , 345), (19567818, 21809185, 2.241, 0), ((5, 1), (3, 1))), ((345 , 450), (21809185, 23817240, 2.008, 0), ((3, 0), (1, 0))), ((345 , 450), (21809185, 23817240, 2.008, 0), ((5, 1), (3, 1))), ((345 , 450), (21809185, 23817240, 2.008, 0), ((1, 1), (2, 1))), ((450 , 781), (23817240, 26924312, 3.107, 0), ((3, 0), (1, 0))), ((450 , 781), (23817240, 26924312, 3.107, 0), ((5, 1), (3, 1))), ((450 , 781), (23817240, 26924312, 3.107, 0), ((1, 1), (2, 1))), ((450 , 781), (23817240, 26924312, 3.107, 0), ((2, 0), (5, 0))), ((781 , 886), (26924312, 27427698, 0.503, 0), ((3, 0), (1, 0))), ((781 , 886), (26924312, 27427698, 0.503, 0), ((1, 1), (2, 1))), ((781 , 886), (26924312, 27427698, 0.503, 0), ((2, 0), (5, 0))), ((886 , 2157), (27427698, 39797178, 12.369, 0), ((5, 1), (1, 1), (2, 1))), ((886 , 2157), (27427698, 39797178, 12.369, 0), ((3, 0), (1, 0))), ((886 , 2157), (27427698, 39797178, 12.369, 0), ((2, 0), (5, 0))), ((2157, 2791), (39797178, 47440321, 7.643, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((2157, 2791), (39797178, 47440321, 7.643, 0), ((3, 0), (1, 0))), ((2157, 2791), (39797178, 47440321, 7.643, 0), ((2, 0), (5, 0))), ((2791, 2901), (47440321, 48282594, 0.842, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((2791, 2901), (47440321, 48282594, 0.842, 0), ((3, 0), (1, 0))), ((2901, 3012), (48282594, 48855412, 0.573, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((2901, 3012), (48282594, 48855412, 0.573, 0), ((3, 0), (2, 0), (1, 0))), ((3012, 3218), (48855412, 51156933, 2.302, 0), ((3, 0), (2, 0), (1, 0))), ((3012, 3218), (48855412, 51156933, 2.302, 0), ((5, 1), (1, 1), (2, 1)))], full_data=True, decimal=3, err_msg='Wrong IBD segments') stats = np.array([(len(s.samples), s.length) for s in m]) best_segment = np.lexsort((-stats[:, 1], -stats[:, 0]))[0] #print np.lexsort((-stats[:, 1], -stats[:, 0])) # print m[best_segment] assert_equal(best_segment, 17, 'Wrong best segment (IBD set size + length)')
def test_ibd_segments(self): '''Test computing GERMLINE IBD segments.''' m = self.ibd_computer.ibd_segments( ig._HapMatrix(self.problem, self.sibs)).group_to_disjoint() assert_segments_almost_equal( m, [((0, 20), (16484792, 17318333, 0.834, 0), ((3, 1), (4, 1), (1, 1), (2, 1))), ((0, 20), (16484792, 17318333, 0.834, 0), ((0, 1), (3, 0), (5, 0))), ((0, 20), (16484792, 17318333, 0.834, 0), ((2, 0), (0, 0), (4, 0))), ((0, 20), (16484792, 17318333, 0.834, 0), ((5, 1), (1, 0))), ((20, 121), (17318333, 18235305, 0.917, 0), ((5, 1), (4, 1), (1, 1), (2, 1))), ((20, 121), (17318333, 18235305, 0.917, 0), ((0, 1), (3, 0), (5, 0))), ((20, 121), (17318333, 18235305, 0.917, 0), ((2, 0), (0, 0), (4, 0))), ((121, 224), (18235305, 19455836, 1.221, 0), ((5, 1), (4, 1), (1, 1), (2, 1))), ((121, 224), (18235305, 19455836, 1.221, 0), ((0, 1), (3, 0), (5, 0))), ((121, 224), (18235305, 19455836, 1.221, 0), ((0, 0), (4, 0))), ((121, 224), (18235305, 19455836, 1.221, 0), ((1, 0), (3, 1))), ((224, 828), (19455836, 27236071, 7.780, 0), ((0, 1), (3, 0), (5, 0), (2, 0))), ((224, 828), (19455836, 27236071, 7.780, 0), ((5, 1), (4, 1), (1, 1), (2, 1))), ((224, 828), (19455836, 27236071, 7.780, 0), ((0, 0), (4, 0))), ((224, 828), (19455836, 27236071, 7.780, 0), ((1, 0), (3, 1))), ((828, 2039), (27236071, 37707962, 10.472, 0), ((0, 1), (3, 0), (5, 0), (2, 0))), ((828, 2039), (27236071, 37707962, 10.472, 0), ((1, 0), (3, 1), (4, 1))), ((828, 2039), (27236071, 37707962, 10.472, 0), ((5, 1), (1, 1), (2, 1))), ((828, 2039), (27236071, 37707962, 10.472, 0), ((0, 0), (4, 0))), ((2039, 2442), (37707962, 44264502, 6.557, 0), ((0, 1), (3, 0), (5, 0), (2, 0))), ((2039, 2442), (37707962, 44264502, 6.557, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((2039, 2442), (37707962, 44264502, 6.557, 0), ((0, 0), (4, 0))), ((2039, 2442), (37707962, 44264502, 6.557, 0), ((1, 0), (4, 1))), ((2442, 2542), (44264502, 44930087, 0.666, 0), ((0, 1), (3, 0), (5, 0), (2, 0))), ((2442, 2542), (44264502, 44930087, 0.666, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((2442, 2542), (44264502, 44930087, 0.666, 0), ((1, 0), (4, 1))), ((2542, 2642), (44930087, 45823327, 0.893, 0), ((0, 1), (3, 0), (4, 0), (5, 0), (2, 0))), ((2542, 2642), (44930087, 45823327, 0.893, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((2542, 2642), (44930087, 45823327, 0.893, 0), ((1, 0), (4, 1))), ((2642, 2742), (45823327, 47169675, 1.346, 0), ((0, 1), (3, 0), (5, 0), (4, 0))), ((2642, 2742), (45823327, 47169675, 1.346, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((2642, 2742), (45823327, 47169675, 1.346, 0), ((1, 0), (4, 1))), ((2742, 2942), (47169675, 48530211, 1.361, 0), ((0, 1), (3, 0), (5, 0), (4, 0))), ((2742, 2942), (47169675, 48530211, 1.361, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((2742, 2942), (47169675, 48530211, 1.361, 0), ((2, 0), (0, 0))), ((2742, 2942), (47169675, 48530211, 1.361, 0), ((1, 0), (4, 1))), ((2942, 3042), (48530211, 49168613, 0.638, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((2942, 3042), (48530211, 49168613, 0.638, 0), ((0, 1), (3, 0), (4, 0))), ((2942, 3042), (48530211, 49168613, 0.638, 0), ((2, 0), (0, 0))), ((2942, 3042), (48530211, 49168613, 0.638, 0), ((1, 0), (4, 1))), ((3042, 3218), (49168613, 51156933, 1.988, 0), ((5, 1), (3, 1), (1, 1), (2, 1))), ((3042, 3218), (49168613, 51156933, 1.988, 0), ((0, 1), (3, 0), (4, 0))), ((3042, 3218), (49168613, 51156933, 1.988, 0), ((2, 0), (0, 0), (5, 0))), ((3042, 3218), (49168613, 51156933, 1.988, 0), ((1, 0), (4, 1)))], full_data=True, decimal=3, err_msg='Wrong IBD segments, grouped') stats = np.array([(len(s.samples), s.length) for s in m]) best_segment = np.lexsort((-stats[:, 1], -stats[:, 0]))[0] assert_equal(best_segment, 26, 'Wrong best segment (IBD set size + length)')
def test_parent_segments(self): '''Test computing GERMLINE IBD segments between the parents.''' m = self.ibd_computer.ibd_segments( ig._HapMatrix(self.problem, self.family.parents)).group_to_disjoint() assert_equal(m.length, 0, 'Wrong best segment (IBD set size + length)')
def test_parent_segments(self): '''Test computing GERMLINE IBD segments between the parents.''' m = self.ibd_computer.ibd_segments(ig._HapMatrix(self.problem, self.family.parents)).group_to_disjoint() assert_equal(m.length, 0, 'Wrong best segment (IBD set size + length)')
return d def longest_segment(d): return max([(snp, len(ibd)) for (snp,ibd) in d.iteritems()], key=operator.itemgetter(1)) def hash_ibd_list(ibd): return dict(((sample, i) for (i, samples) in enumerate(ibd) for sample in samples)) def highest_degree_node(ibd): return max([(i,len(ibd[hap_index[(i,0)]])+len(ibd[hap_index[(i,1)]])) for i in sibs], key=operator.itemgetter(1))[0] p = im.io.read_npz(itu.SIB_FOUNDERS_STAGE3) f = p.families(genotyped=False)[0] sibs = ig._filled_members(p, f) h_mat = ig._HapMatrix(p, sibs) print h_mat c = ig.GermlineIbdComputer() segments = c.ibd_segments(h_mat) segments.group_to_disjoint() print segments haps = set(itertools.product(sibs, ALLELES)) d = compress(segments, haps) (best_segment, num_haps) = longest_segment(d) print best_segment, num_haps ibd = d[best_segment] print ibd hap_index = hash_ibd_list(ibd) template = highest_degree_node(ibd) print template