Example #1
0
 def print_table(self):
     '''Print a table of probabilities at each SNP.'''
     options = np.get_printoptions()
     np.set_printoptions(precision=3, suppress=True, threshold=np.nan, linewidth=200)
     print 'lambda = %s, Delta = %s, eps = %.1e' % (self.lam, repr(self.Delta)[6:-1], self.e)
     print 'Viterbi path (frame SNPs): ' + ' -> '.join(map(lambda x: '%d (%d-%d)' % (x[0], x[1][0], x[1][1]),
                                               itemutil.groupby_with_range(self.Q_star + 1)))
     print 'Viterbi path (SNPs):       ' + ' -> '.join(map(lambda x: '%d (%d-%d)' % (x[0], self.snps[x[1][0]], self.snps[x[1][1]]),
                                               itemutil.groupby_with_range(self.Q_star + 1)))
     print '    %-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s' % \
     ('t', 'SNP#', 'Obs', 'G1', 'G2', 'lam*dx', 'p',
      'Gam1', 'Gam2', 'Gam3', 'Gam4', 'Gam5', 'Gam6', 'Gam7', 'Gam8', 'Gam9',
      'p(IBD)', 'Viterbi', 'IBD?')
     print np.concatenate((np.arange(len(self.x))[np.newaxis].transpose(),
                           self.snps[np.newaxis].transpose(),
                           self.Obs[np.newaxis].transpose(),
                           np.array([ProbIbdHmmCalculator._T_STATE_G[t][0] for t in self.Obs])[np.newaxis].transpose(),
                           np.array([ProbIbdHmmCalculator._T_STATE_G[t][1] for t in self.Obs])[np.newaxis].transpose(),
                           np.concatenate((self.lam_x, [0]))[np.newaxis].transpose(),
                           self.p[np.newaxis].transpose(),
                           self.Gamma.transpose(),
                           self.p_ibd_gamma[np.newaxis].transpose(),
                           (self.Q_star + 1)[np.newaxis].transpose(),
                           self.p_ibd_viterbi[np.newaxis].transpose()
                           ), axis=1)
     util.set_printoptions(options)
Example #2
0
 def print_table(self):
     '''Print a table of probabilities at each SNP.'''
     options = np.get_printoptions()
     np.set_printoptions(precision=3,
                         suppress=True,
                         threshold=np.nan,
                         linewidth=200)
     print 'lambda = %.2f, f = %.2f, eps = %.1e' % (self.lam, self.f,
                                                    self.e)
     print 'Viterbi path (frame SNPs): ' + ' -> '.join(
         map(lambda x: '%d (%d-%d)' % (x[0], x[1][0], x[1][1]),
             itemutil.groupby_with_range(self.Q_star)))
     print 'Viterbi path (SNPs):       ' + ' -> '.join(
         map(
             lambda x: '%d (%d-%d)' %
             (x[0], self.snps[x[1][0]], self.snps[x[1][1]]),
             itemutil.groupby_with_range(self.Q_star)))
     print '      %-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s' % \
     ('t', 'SNP#', 'H1', 'H2', 'x', 'lam*dx', 'p', 'Gam0', 'Gam1', 'p(IBD)', 'Viterbi', 'IBD?')
     print np.concatenate(
         (np.arange(len(self.x))[np.newaxis].transpose(),
          self.snps[np.newaxis].transpose(),
          self.Obs[np.newaxis].transpose() / 2,
          self.Obs[np.newaxis].transpose() % 2,
          self.x[np.newaxis].transpose(),
          np.concatenate((self.lam * np.diff(self.x), [
              0
          ]))[np.newaxis].transpose(), self.p[np.newaxis].transpose(),
          self.Gamma.transpose(), self.p_ibd_gamma[np.newaxis].transpose(),
          (self.Q_star)[np.newaxis].transpose(),
          self.p_ibd_viterbi[np.newaxis].transpose()),
         axis=1)
     util.set_printoptions(options)
Example #3
0
 def test_groupby_with_range(self):
     '''Test the groupby_with_range() function.'''
     assert_equal(iu.groupby_with_range(''), [])
     assert_equal(iu.groupby_with_range('AAAABBBCCDAABBB'), 
                  [('A', (0, 3)),
                   ('B', (4, 6)),
                   ('C', (7, 8)),
                   ('D', (9, 9)),
                   ('A', (10, 11)),
                   ('B', (12, 14))])
Example #4
0
 def print_table(self):
     '''Print a table of probabilities at each SNP.'''
     options = np.get_printoptions()
     np.set_printoptions(precision=3,
                         suppress=True,
                         threshold=np.nan,
                         linewidth=200)
     print 'lambda = %s, Delta = %s, eps = %.1e' % (
         self.lam, repr(self.Delta)[6:-1], self.e)
     print 'Viterbi path (frame SNPs): ' + ' -> '.join(
         map(lambda x: '%d (%d-%d)' % (x[0], x[1][0], x[1][1]),
             itemutil.groupby_with_range(self.Q_star + 1)))
     print 'Viterbi path (SNPs):       ' + ' -> '.join(
         map(
             lambda x: '%d (%d-%d)' %
             (x[0], self.snps[x[1][0]], self.snps[x[1][1]]),
             itemutil.groupby_with_range(self.Q_star + 1)))
     print '    %-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s' % \
     ('t', 'SNP#', 'Obs', 'G1', 'G2', 'lam*dx', 'p',
      'Gam1', 'Gam2', 'Gam3', 'Gam4', 'Gam5', 'Gam6', 'Gam7', 'Gam8', 'Gam9',
      'p(IBD)', 'Viterbi', 'IBD?')
     print np.concatenate(
         (np.arange(len(self.x))[np.newaxis].transpose(),
          self.snps[np.newaxis].transpose(),
          self.Obs[np.newaxis].transpose(),
          np.array(
              [ProbIbdHmmCalculator._T_STATE_G[t][0]
               for t in self.Obs])[np.newaxis].transpose(),
          np.array([
              ProbIbdHmmCalculator._T_STATE_G[t][1] for t in self.Obs
          ])[np.newaxis].transpose(), np.concatenate(
              (self.lam_x, [0]))[np.newaxis].transpose(),
          self.p[np.newaxis].transpose(), self.Gamma.transpose(),
          self.p_ibd_gamma[np.newaxis].transpose(),
          (self.Q_star + 1)[np.newaxis].transpose(),
          self.p_ibd_viterbi[np.newaxis].transpose()),
         axis=1)
     util.set_printoptions(options)
Example #5
0
 def print_table(self):
     '''Print a table of probabilities at each SNP.'''
     options = np.get_printoptions()
     np.set_printoptions(precision=3, suppress=True, threshold=np.nan, linewidth=200)
     print 'lambda = %.2f, f = %.2f, eps = %.1e' % (self.lam, self.f, self.e)
     print 'Viterbi path (frame SNPs): ' + ' -> '.join(map(lambda x: '%d (%d-%d)' % (x[0], x[1][0], x[1][1]),
                                               itemutil.groupby_with_range(self.Q_star)))
     print 'Viterbi path (SNPs):       ' + ' -> '.join(map(lambda x: '%d (%d-%d)' % (x[0], self.snps[x[1][0]], self.snps[x[1][1]]),
                                               itemutil.groupby_with_range(self.Q_star)))
     print '      %-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s' % \
     ('t', 'SNP#', 'H1', 'H2', 'x', 'lam*dx', 'p', 'Gam0', 'Gam1', 'p(IBD)', 'Viterbi', 'IBD?')
     print np.concatenate((np.arange(len(self.x))[np.newaxis].transpose(),
                           self.snps[np.newaxis].transpose(),
                           self.Obs[np.newaxis].transpose() / 2,
                           self.Obs[np.newaxis].transpose() % 2,
                           self.x[np.newaxis].transpose(),
                           np.concatenate((self.lam * np.diff(self.x), [0]))[np.newaxis].transpose(),
                           self.p[np.newaxis].transpose(),
                           self.Gamma.transpose(),
                           self.p_ibd_gamma[np.newaxis].transpose(),
                           (self.Q_star)[np.newaxis].transpose(),
                           self.p_ibd_viterbi[np.newaxis].transpose()
                           ), axis=1)
     util.set_printoptions(options)
Example #6
0
def hap_segments(h):
    '''Return IBD segments between two phased sample haplotypes (i,a),(j,b) encapsulated by
    the IbdProblem object ''h''.'''
    # Load data
    (i, a), (j, b), debug, num_snps, frame = h.hap1, h.hap2, h.params.debug, h.num_snps, h.snps
    if debug:
        print '-' * 70
        print 'IBD Segments between (%d,%d), (%d,%d)' % (i, a, j, b)
        print '-' * 70
        
    # Calculate IBD posterior using the largest frame
    prob_ibd = prob_ibd_hmm_from_raw_input(h)
    is_ibd = prob_ibd > 0.0
    if debug:
        # print 'IBD mask', is_ibd.astype(np.uint)
        print 'Frame IBD segments', [(frame[x[START]], frame[x[STOP]] if x[STOP] < len(frame) else num_snps)
                                     for x in im.segment.segments_with_value(is_ibd, True, 3)] 
    
    # Output sufficiently-long segments
    min_len_sub_segment = h.params.min_len_sub_segment  # If a segment is identified by HMM and is above params.min_len, allow smaller sub-segments in which the haps fit @UnusedVariable
    bp, cm, len_unit = h.bp, h.cm, h.params.len_unit
    coord = cm if (len_unit == 'cm') else bp / (1.0 * im.constants.MEGA_BASE_PAIR) 
    pair = [h.hap1, h.hap2]
    long_segments = im.segment.SegmentSet([])
    coord_bp = lambda x, y: (bp[x], im.segment.stop_bp(bp, y, num_snps))
    coord_cm = lambda x, y: (cm[x], im.segment.stop_bp(cm, y, num_snps))
    coord_unit = lambda x, y: (coord[x], im.segment.stop_bp(coord, y, num_snps))
    for fr, full_segment in ((fr, __frame_segment_to_snp(frame, fr, num_snps))
                             for fr in im.segment.segments_with_value(is_ibd, True)):
        # Interpolate prob_ibd from frame SNPs to all SNPs in the segments
        start, stop = full_segment[START], full_segment[STOP]  # Original indices of segment boundaries
        cm_start, cm_stop = coord_unit(start, stop)
        if debug:
            print '%d:%d coordinates %f:%f' % (start, stop, cm_start, cm_stop)
        if cm_stop - cm_start >= h.params.min_len:  # Threshold should be really small to catch everything, since we believe the IBD HMM to give the correct answer
            fr_start, fr_stop = fr[START], fr[STOP]  # Frame indices of segment boundaries
            confidence = np.interp(coord[start:stop], coord[frame[fr_start:fr_stop]], prob_ibd[fr_start:fr_stop])
            # Add entire segment
            # long_segments += Segment((start, stop), pair, (bp_start, bp_stop), confidence=confidence)
            # Check if haplotypes fit in ALL SNPs, not just SNP frames. Restrict to largest
            # sub-segment of consecutive "1"s found in [start,stop] (allowing genotype error mismatches)
            d = h.d[start:stop]
            d_filtered = im.ibd.filter_diff(d, 'median', 10)
            if debug:
                np.set_printoptions(threshold=np.nan)
                print '    %-4s %-4s %-4s %-4s %-4s' % ('SNP', 'h1', 'h2', 'd', 'filt')
                print np.concatenate((np.arange(start, stop)[np.newaxis].transpose(),
                                      h.all_h1[start:stop][np.newaxis].transpose(),
                                      h.all_h2[start:stop][np.newaxis].transpose(),
                                      d.astype(np.int)[np.newaxis].transpose(),
                                      d_filtered.astype(np.int)[np.newaxis].transpose()
                                      ), axis=1)

            # Use all sufficiently-long sub-segments
            ind = [v for k, v in itemutil.groupby_with_range(d_filtered) if k]
            s = map(lambda x: x + start, ind)  # segments of ones
            sub_segments = [segment for segment in
                            (Segment((int(s[i][START]), int(s[i][STOP])), pair, coord_bp(s[i][START], s[i][STOP]),
                                     confidence=confidence[ind[i][START]:ind[i][STOP]],
                                     cm=coord_cm(s[i][START], s[i][STOP]))
                             for i in xrange(len(ind)) if s[i][STOP] > s[i][START])
                             if (segment.length_cm if (len_unit == 'cm') else segment.length) >= min_len_sub_segment]
            if stop > start: long_segments += sub_segments
            if debug:
                print 'segments of ones', s
                print 'ind', ind
                print 'sub-segments', sub_segments
        else:
            if debug: print 'Segment shorter than %.2f threhold' % (h.params.min_len,)
    if debug:
        print 'Long-enough segments (segment >= %.2f %s, sub-segment >= %.2f %s):' % \
        (h.params.min_len, len_unit, h.params.min_len_sub_segment, len_unit)
        print long_segments.pprint_segments(show_bp=True) if long_segments else '\t-'
        print ''
    return long_segments