def ibd_posterior_hh(lam, Delta, m):
    '''Simple case: sibs with (1,1) genotypes along the segment.'''
    x = np.arange(m)
    m = len(x)
    return ProbIbdHmmHapCalculator(lam=lam,
                                   f=kinship(Delta),
                                   x=x,
                                   p=0.2 * np.ones(m),
                                   h1=np.zeros((m, ), dtype=np.uint),
                                   h2=np.zeros((m, ), dtype=np.uint),
                                   e=0.01).prob()
def ibd_posterior_hh(lam, Delta, m):
    """Simple case: sibs with (1,1) genotypes along the segment."""
    x = np.arange(m)
    m = len(x)
    return ProbIbdHmmHapCalculator(
        lam=lam,
        f=kinship(Delta),
        x=x,
        p=0.2 * np.ones(m),
        h1=np.zeros((m,), dtype=np.uint),
        h2=np.zeros((m,), dtype=np.uint),
        e=0.01,
    ).prob()
    m = 1024
    lam, Delta = models['sibs']
    gg = ibd_posterior_gg(lam, Delta, m)
    hh = ibd_posterior_hh(lam, Delta, m)
    a = [gg, hh]
    print m, np.max(gg), np.max(hh)

    # Create a confidence vs. x plot for various segment sizes M
    np.set_printoptions(precision=2, suppress=True)
    n = 5
    fig_num = 0
    for model, (lam, Delta) in models.iteritems():
        fig_num += 1
        P.figure(fig_num)
        P.clf()
        [plot_posterior(lam, Delta, n, i, 4**i) for i in xrange(1, n + 1)]

        P.subplot(n, 1, 1)
        P.title(
            'IBD Posterior Probability\n$\lambda = %.2f, \Delta = %s, f = %.2f$'
            % (lam, repr(Delta)[6:-1], kinship(Delta)))
        P.subplot(n, 1, n)

        P.xlabel('Genetic Distance')
        # Crappy aspect ratio but doesn't matter for now
        P.show()
        P.savefig('%s/ibd_%s.png' % (
            out_dir,
            model,
        ))
    np.set_printoptions(precision=15, suppress=True)
    m = 1024
    lam, Delta = models["sibs"]
    gg = ibd_posterior_gg(lam, Delta, m)
    hh = ibd_posterior_hh(lam, Delta, m)
    a = [gg, hh]
    print m, np.max(gg), np.max(hh)

    # Create a confidence vs. x plot for various segment sizes M
    np.set_printoptions(precision=2, suppress=True)
    n = 5
    fig_num = 0
    for model, (lam, Delta) in models.iteritems():
        fig_num += 1
        P.figure(fig_num)
        P.clf()
        [plot_posterior(lam, Delta, n, i, 4 ** i) for i in xrange(1, n + 1)]

        P.subplot(n, 1, 1)
        P.title(
            "IBD Posterior Probability\n$\lambda = %.2f, \Delta = %s, f = %.2f$"
            % (lam, repr(Delta)[6:-1], kinship(Delta))
        )
        P.subplot(n, 1, n)

        P.xlabel("Genetic Distance")
        # Crappy aspect ratio but doesn't matter for now
        P.show()
        P.savefig("%s/ibd_%s.png" % (out_dir, model))
Esempio n. 5
0
def parse_line(line):
    '''Parse a segment file line into a key (sample1,sample2) and value (segment set).'''
    items = [int(x) for x in line]
    return tuple(items[0:2]), im.segment.DisjointSegmentSet(zip(items[2::2], items[3::2])) 

####################################################################################
if __name__ == '__main__':
    '''
    --------------------------------------------------
    Main program
    --------------------------------------------------
    '''
    options = parse_command_line_args()
    param = im.PhaseParam()
    if options.id_coef_file:
        param.id_coef_file = options.id_coef_file
    try:
        for line in csv.reader(sys.stdin, delimiter=' ', skipinitialspace=True):
            key, A = parse_line(line)
            # Output statistics. Only log pairs for which samples were found
            len_A = A.length
            if len_A != 0:
                #f = param.kinship(key[0], key[1])
                _, Delta = param.id_coefs(key[0], key[1])
                f = kinship(Delta)
                pibd = p_ibd(Delta)
                sys.stdout.write('%d %d %f %f %d\n' % (key[0], key[1], f, pibd, len_A))
    except:
        traceback.print_exc(file=sys.stdout)
        sys.exit(util.EXIT_FAILURE)
Esempio n. 6
0
        zip(items[2::2], items[3::2]))


####################################################################################
if __name__ == '__main__':
    '''
    --------------------------------------------------
    Main program
    --------------------------------------------------
    '''
    options = parse_command_line_args()
    param = im.PhaseParam()
    if options.id_coef_file:
        param.id_coef_file = options.id_coef_file
    try:
        for line in csv.reader(sys.stdin, delimiter=' ',
                               skipinitialspace=True):
            key, A = parse_line(line)
            # Output statistics. Only log pairs for which samples were found
            len_A = A.length
            if len_A != 0:
                #f = param.kinship(key[0], key[1])
                _, Delta = param.id_coefs(key[0], key[1])
                f = kinship(Delta)
                pibd = p_ibd(Delta)
                sys.stdout.write('%d %d %f %f %d\n' %
                                 (key[0], key[1], f, pibd, len_A))
    except:
        traceback.print_exc(file=sys.stdout)
        sys.exit(util.EXIT_FAILURE)